From 9ef3831eea981e62797c692de475c38001edf115 Mon Sep 17 00:00:00 2001 From: Santiago Martinez Date: Fri, 14 Jul 2023 21:50:25 +0100 Subject: [PATCH] Added some docs for loading and saving datasets --- docs/examples/example_dataset.json | 1 + docs/examples/my_dataset.json | 1 + docs/examples/saving_and_loading.py | 52 +++++++++++++++++++++++++++++ src/soundevent/data/dataset.py | 4 +-- src/soundevent/io/datasets.py | 2 +- 5 files changed, 57 insertions(+), 3 deletions(-) create mode 100644 docs/examples/example_dataset.json create mode 100644 docs/examples/my_dataset.json create mode 100644 docs/examples/saving_and_loading.py diff --git a/docs/examples/example_dataset.json b/docs/examples/example_dataset.json new file mode 100644 index 0000000..388468b --- /dev/null +++ b/docs/examples/example_dataset.json @@ -0,0 +1 @@ +{"info":{"id":"c1cb90ed-5486-4996-a7dd-b2d4a5de8251","name":"test_dataset","description":"A test dataset.","date_created":"2023-07-14T21:38:48.264683"},"tags":[{"id":0,"key":"species","value":"Myotis myotis"},{"id":1,"key":"sex","value":"female"},{"id":2,"key":"behaviour","value":"foraging"},{"id":3,"key":"species","value":"Eptesicus serotinus"},{"id":4,"key":"sex","value":"male"},{"id":5,"key":"behaviour","value":"social calls"}],"recordings":[{"id":"c6e43cbb-1a55-467c-9071-0774b0198969","path":"recording1.wav","duration":10.0,"channels":1,"samplerate":441000,"time_expansion":10.0,"hash":"1234567890abcdef","date":"2021-01-01","time":"21:34:56","latitude":12.345,"longitude":34.567,"tags":[0,1,2],"features":{"SNR":10.0,"ACI":0.5},"notes":[{"uuid":"8ed5b892-86cf-4f39-bb3a-c97b9f5c0f6b","message":"This is a note.","created_by":"John Doe","is_issue":false,"created_at":"2021-01-01T12:34:56"}]},{"id":"43cbf2e1-87ae-4ff8-af9e-7038e0300e39","path":"recording2.wav","duration":8.0,"channels":1,"samplerate":441000,"time_expansion":10.0,"hash":"234567890abcdef1","date":"2021-01-02","time":"19:34:56","latitude":13.345,"longitude":32.567,"tags":[3,4,5],"features":{"SNR":7.0,"ACI":0.3},"notes":[{"uuid":"429c455c-eb45-4f5e-99e6-35d06119463e","message":"Unsure about the species.","created_by":"John Doe","is_issue":false,"created_at":"2021-01-01T12:34:56"}]}]} \ No newline at end of file diff --git a/docs/examples/my_dataset.json b/docs/examples/my_dataset.json new file mode 100644 index 0000000..585cf9c --- /dev/null +++ b/docs/examples/my_dataset.json @@ -0,0 +1 @@ +{"info":{"id":"c1cb90ed-5486-4996-a7dd-b2d4a5de8251","name":"test_dataset","description":"A test dataset.","date_created":"2023-07-14T21:49:35.173942"},"tags":[{"id":0,"key":"species","value":"Myotis myotis"},{"id":1,"key":"sex","value":"female"},{"id":2,"key":"behaviour","value":"foraging"},{"id":3,"key":"species","value":"Eptesicus serotinus"},{"id":4,"key":"sex","value":"male"},{"id":5,"key":"behaviour","value":"social calls"}],"recordings":[{"id":"c6e43cbb-1a55-467c-9071-0774b0198969","path":"recording1.wav","duration":10.0,"channels":1,"samplerate":441000,"time_expansion":10.0,"hash":"1234567890abcdef","date":"2021-01-01","time":"21:34:56","latitude":12.345,"longitude":34.567,"tags":[0,1,2],"features":{"SNR":10.0,"ACI":0.5},"notes":[{"uuid":"8ed5b892-86cf-4f39-bb3a-c97b9f5c0f6b","message":"This is a note.","created_by":"John Doe","is_issue":false,"created_at":"2021-01-01T12:34:56"}]},{"id":"43cbf2e1-87ae-4ff8-af9e-7038e0300e39","path":"recording2.wav","duration":8.0,"channels":1,"samplerate":441000,"time_expansion":10.0,"hash":"234567890abcdef1","date":"2021-01-02","time":"19:34:56","latitude":13.345,"longitude":32.567,"tags":[3,4,5],"features":{"SNR":7.0,"ACI":0.3},"notes":[{"uuid":"429c455c-eb45-4f5e-99e6-35d06119463e","message":"Unsure about the species.","created_by":"John Doe","is_issue":false,"created_at":"2021-01-01T12:34:56"}]}]} \ No newline at end of file diff --git a/docs/examples/saving_and_loading.py b/docs/examples/saving_and_loading.py new file mode 100644 index 0000000..6b48bd2 --- /dev/null +++ b/docs/examples/saving_and_loading.py @@ -0,0 +1,52 @@ +""" +# Saving and Loading data + +In `soundevent`, we use the **Acoustic Objects Exchange Format** (**AOEF**) for +storing and exchanging audio objects. **AOEF** is a JSON-based format +specifically designed to standardize the representation of bioacoustic data, +enabling effective sharing and collaboration among researchers. + +To demonstrate how to save and load data in **AOEF** format, we provide +examples below: + +""" + +# %% +# ## Loading Datasets +# Suppose we have an example dataset stored in the **AOEF** format. The dataset +# is stored as a text file following the JSON structure. To view the contents +# of the file, you can use the following code. + +import json +from pathlib import Path + +dataset_path = Path("example_dataset.json") +with open(dataset_path) as file: + dataset_contents = json.load(file) + +print(json.dumps(dataset_contents, indent=4)) + +# %% +# However, using the loading functions provided by the `soundevent` package, +# you can directly load the data into Python and obtain a `Dataset` object +# defined in the `soundevent.data` module: + +from soundevent import io + +dataset = io.load_dataset(dataset_path) +print(repr(dataset)) + +# %% +# By using the `load_dataset` function, you can access and analyze the dataset +# with all its recordings and related objects structured in a standardized and +# manageable way. + +recording = dataset.recordings[0] +print(f"First recording: {recording!r}") +print(f"Recording tags: {recording.tags}") + +# %% +# If you have your own dataset, you can save it to a file using the following +# code: + +io.save_dataset(dataset, "my_dataset.json") diff --git a/src/soundevent/data/dataset.py b/src/soundevent/data/dataset.py index 552d6e0..604d7ed 100644 --- a/src/soundevent/data/dataset.py +++ b/src/soundevent/data/dataset.py @@ -62,7 +62,7 @@ class Dataset(BaseModel): """Datasets.""" - id: UUID = Field(default_factory=uuid4) + id: UUID = Field(default_factory=uuid4, repr=False) """The unique identifier of the dataset.""" name: str @@ -71,5 +71,5 @@ class Dataset(BaseModel): description: Optional[str] = None """A description of the dataset.""" - recordings: List[Recording] = Field(default_factory=list) + recordings: List[Recording] = Field(default_factory=list, repr=False) """List of recordings associated with the dataset.""" diff --git a/src/soundevent/io/datasets.py b/src/soundevent/io/datasets.py index 989cb9f..96166b9 100644 --- a/src/soundevent/io/datasets.py +++ b/src/soundevent/io/datasets.py @@ -285,7 +285,7 @@ def save_dataset_json_format( recordings.append( RecordingObject( id=recording.id, - path=recording.path.relative_to(audio_dir), + path=recording.path.resolve().relative_to(audio_dir), duration=recording.duration, channels=recording.channels, samplerate=recording.samplerate,