Merge branch 'master' into Pedro/jamendo

mir-dataset-loaders · Sep 3, 2021 · 5a69b50 · 5a69b50
2 parents ed428b3 + 6e087cf
commit 5a69b50
Show file tree

Hide file tree

Showing 107 changed files with 378,153 additions and 432 deletions.
diff --git a/docs/conf.py b/docs/conf.py
@@ -35,7 +35,16 @@
 
 
 # -- Mock dependencies -------------------------------------------------------
-autodoc_mock_imports = ["librosa", "numpy", "jams", "pretty_midi", "DALI", "music21"]
+autodoc_mock_imports = [
+    "librosa",
+    "numpy",
+    "jams",
+    "pretty_midi",
+    "DALI",
+    "music21",
+    "yaml",
+    "scipy",
+]
 
 
 # # -- General configuration ---------------------------------------------------

diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1,6 +1,7 @@
 sphinx-togglebutton
 dali-dataset>=1.0
 jams
+chardet
 librosa>=0.7.0
 tqdm
 sphinx==3.4.0
diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst
@@ -279,7 +279,7 @@ You may find these examples useful as references:
     - `A dataset with a custom download function <https://github.com/mir-dataset-loaders/mirdata/blob/master/mirdata/datasets/maestro.py#L257>`_
     - `A dataset with a remote index <https://github.com/mir-dataset-loaders/mirdata/blob/master/mirdata/datasets/acousticbrainz_genre.py>`_
     - `A dataset with extra dependencies <https://github.com/mir-dataset-loaders/mirdata/blob/master/mirdata/datasets/dali.py>`_
-    - `A dataset which has multitracks <https://github.com/mir-dataset-loaders/mirdata/blob/master/mirdata/datasets/phenicx_anechoic.py>`_
+    - `A dataset with multitracks <https://github.com/mir-dataset-loaders/mirdata/blob/master/mirdata/datasets/phenicx_anechoic.py>`_
 
 For many more examples, see the `datasets folder <https://github.com/mir-dataset-loaders/mirdata/tree/master/mirdata/datasets>`_.
 
@@ -474,7 +474,10 @@ In general, if the new library will be useful for many future datasets, we will
 dependency. If it is specific to one dataset, we will add it as an optional dependency.
 
 To add an optional dependency, add the dataset name as a key in `extras_require` in setup.py,
-and list any additional dependencies. When importing these optional dependencies in the dataset
+and list any additional dependencies. Additionally, mock the dependecies in docs/conf.py
+by adding it to the `autodoc_mock_imports` list.
+
+When importing these optional dependencies in the dataset
 module, use a try/except clause and log instructions if the user hasn't installed the extra
 requriements. 
 
@@ -685,9 +688,10 @@ We use the following libraries for loading data from files:
 +-------------------------+-------------+
 | jams                    | jams        |
 +-------------------------+-------------+
+| yaml                    | pyyaml      |
++-------------------------+-------------+
 
-If a file format needed for a dataset is not included in this list, please see the extra dependencies section.
-# TODO
+If a file format needed for a dataset is not included in this list, please see `this section <extra_dependencies_>`_
 
 Track Attributes
 ----------------

diff --git a/docs/source/contributing_examples/example.py b/docs/source/contributing_examples/example.py
@@ -134,15 +134,20 @@ def composer(self) -> Optional[str]:
     # -- series data loaded from a file a cached property
     @core.cached_property
     def annotation(self) -> Optional[annotations.EventData]:
-        """output type: description of output"""
         return load_annotation(self.annotation_path)
 
     # -- `audio` will behave like an attribute, but it will only be loaded
     # -- when someone accesses it and it won't be stored. By default, we make
     # -- any memory heavy information (like audio) properties
     @property
     def audio(self) -> Optional[Tuple[np.ndarray, float]]:
-        """(np.ndarray, float): DESCRIPTION audio signal, sample rate"""
+        """The track's audio
+
+        Returns:
+            * np.ndarray - audio signal
+            * float - sample rate
+
+        """
         return load_audio(self.audio_path)
 
     # -- we use the to_jams function to convert all the annotations in the JAMS format.
@@ -172,26 +177,44 @@ class MultiTrack(core.MultiTrack):
     Attributes:
         mtrack_id (str): track id
         tracks (dict): {track_id: Track}
-        track_audio_attribute (str): the name of the attribute of Track which
+        track_audio_property (str): the name of the attribute of Track which
             returns the audio to be mixed
         # -- Add any of the dataset specific attributes here
 
     Cached Properties:
         annotation (EventData): a description of this annotation
 
     """
-    def __init__(self, mtrack_id, data_home):
-        self.mtrack_id = mtrack_id
-        self._data_home = data_home
-        # these three attributes below must have exactly these names
-        self.track_ids = [...] # define which track_ids should be part of the multitrack
-        self.tracks = {t: Track(t, self._data_home) for t in self.track_ids}
-        self.track_audio_property = "audio" # the property of Track which returns the relevant audio file for mixing
+    def __init__(
+        self, mtrack_id, data_home, dataset_name, index, track_class, metadata
+    ):
+        # -- this sets the following attributes:
+        # -- * mtrack_id
+        # -- * _dataset_name
+        # -- * _data_home
+        # -- * _multitrack_paths
+        # -- * _metadata
+        # -- * _track_class
+        # -- * _index
+        # -- * track_ids
+        super().__init__(
+            mtrack_id=mtrack_id,
+            data_home=data_home,
+            dataset_name=dataset_name,
+            index=index,
+            track_class=track_class,
+            metadata=metadata,
+        )
 
         # -- optionally add any multitrack specific attributes here
         self.mix_path = ...  # this can be called whatever makes sense for the datasets
         self.annotation_path = ...
 
+    # If you want to support multitrack mixing in this dataset, set this property
+    @property
+    def track_audio_property(self):
+        return "audio"  # the attribute of Track, e.g. Track.audio, which returns the audio to mix
+
     # -- multitracks can optionally have mix-level cached properties and properties
     @core.cached_property
     def annotation(self) -> Optional[annotations.EventData]:
@@ -200,7 +223,13 @@ def annotation(self) -> Optional[annotations.EventData]:
 
     @property
     def audio(self) -> Optional[Tuple[np.ndarray, float]]:
-        """(np.ndarray, float): DESCRIPTION audio signal, sample rate"""
+        """The track's audio
+
+        Returns:
+            * np.ndarray - audio signal
+            * float - sample rate
+
+        """
         return load_audio(self.audio_path)
 
     # -- multitrack classes are themselves Tracks, and also need a to_jams method

diff --git a/docs/source/contributing_examples/test_example.py b/docs/source/contributing_examples/test_example.py
@@ -1,4 +1,7 @@
+"""Tests for example dataset
+"""
 import numpy as np
+import pytest
 
 from mirdata import annotations
 from mirdata.datasets import example

diff --git a/docs/source/mirdata.rst b/docs/source/mirdata.rst
@@ -281,6 +281,14 @@ saraga_hindustani
    :inherited-members:
 
 
+slakh
+^^^^^^^^^^^^^^^^^
+
+.. automodule:: mirdata.datasets.slakh
+   :members:
+   :inherited-members:
+
+
 tinysol
 ^^^^^^^
 

diff --git a/docs/source/table.rst b/docs/source/table.rst
@@ -331,6 +331,15 @@
      - .. image:: https://licensebuttons.net/l/by-nc-sa/4.0/80x15.png
           :target: https://creativecommons.org/licenses/by-nc-sa/4.0
 
+   * - Slakh
+     - - multitrack audio: ✅
+       - annotations: ✅
+     - - Notes :ref:`notes`
+       - Instruments :ref:`instruments`
+     - 1710
+     - .. image:: https://licensebuttons.net/l/by/4.0/80x15.png
+          :target: https://creativecommons.org/licenses/by/4.0/
+
    * - Tinysol
      - - audio: ✅
        - annotations: ✅