Merge pull request #56 from sensein/dev_0.2.0

Release 0.2.0
sensein · Jun 10, 2024 · ef4ee2e · ef4ee2e
2 parents 9cfb5e3 + a3c310d
commit ef4ee2e
Show file tree

Hide file tree

Showing 7 changed files with 130 additions and 231 deletions.
diff --git a/.gitignore b/.gitignore
@@ -106,7 +106,7 @@ ipython_config.py
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
-# poetry.lock
+poetry.lock
 
 # pdm
 #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
@@ -166,4 +166,4 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 .idea/
 
-data/
+data/
diff --git a/pyproject.toml b/pyproject.toml
@@ -42,8 +42,6 @@ pydantic = "^2.7.3"
 accelerate = "^0.29.3"
 huggingface-hub = "^0.23.3"
 praat-parselmouth = "^0.4.3"
-iso-639 = {git = "https://github.com/noumar/iso639.git", tag = "0.4.5"}
-opensmile = "^2.5.0"
 audiomentations = "^0.35.0"
 torch-audiomentations = "^0.11.1"
 sentence-transformers = "^2.7.0"

diff --git a/src/senselab/audio/tasks/features_extraction/opensmile.py b/src/senselab/audio/tasks/features_extraction/opensmile.py
@@ -1,59 +1,70 @@
-"""This module contains functions for extracting openSMILE features."""
+# ruff: noqa
+'''"""This module contains functions for extracting openSMILE features."""
 
-from typing import Any, Dict
+
+from typing import Any, Dict, List
 
 import opensmile
 
-from senselab.utils.tasks.input_output import (
-    _from_dict_to_hf_dataset,
-    _from_hf_dataset_to_dict,
-)
+from senselab.utils.data_structures.audio import Audio
+
+
+class OpenSmileFeatureExtractorFactory:
+    """A factory for managing openSMILE feature extractors."""
+
+    _extractors: Dict[str, opensmile.Smile] = {}
+
+    @classmethod
+    def get_opensmile_extractor(cls, feature_set: str, feature_level: str) -> opensmile.Smile:
+        """Get or create an openSMILE feature extractor.
 
+Args:
+            feature_set (str): The openSMILE feature set.
+            feature_level (str): The openSMILE feature level.
 
-def extract_feats_from_dataset(
-    dataset: Dict[str, Any],
-    audio_column: str = "audio",
+Returns:
+            opensmile.Smile: The openSMILE feature extractor.
+        """
+        key = f"{feature_set}-{feature_level}"
+        if key not in cls._extractors:
+            cls._extractors[key] = opensmile.Smile(
+                feature_set=opensmile.FeatureSet[feature_set],
+                feature_level=opensmile.FeatureLevel[feature_level],
+            )
+        return cls._extractors[key]
+
+def extract_feats_from_audios(
+    audios: List[Audio],
     feature_set: str = "eGeMAPSv02",
     feature_level: str = "Functionals",
-) -> Dict[str, Any]:
-    """Apply feature extraction across a dataset of audio files.
+) -> List[Dict[str, Any]]:
+    """Apply feature extraction across a list of audio files.
+
+Args:
+        audios (List[Audio]): The list of audio objects to extract features from.
+        feature_set (str): The openSMILE feature set (default is "eGeMAPSv02").
+        feature_level (str): The openSMILE feature level (default is "Functionals").
 
-    Low-level descriptors are extracted on 20ms windows with a hop of 10ms.
-    Functional descriptors are extracted on the entire audio signal.
+Returns:
+        List[Dict[str, Any]]: The list of feature dictionaries for each audio.
     """
+    def _extract_feats_from_audio(sample: Audio, smile: opensmile.Smile) -> Dict[str, Any]:
+        """Extract features from a single audio sample using openSMILE.
 
-    def _load_opensmile_model(
-        feature_set: str, feature_level: str
-    ) -> opensmile.Smile:
-        """Load an openSMILE configuration to extract audio features."""
-        smile = opensmile.Smile(
-            feature_set=opensmile.FeatureSet[feature_set],
-            feature_level=opensmile.FeatureLevel[feature_level],
-        )
-        return smile
-
-    def _extract_feats_from_row(
-        sample: Dict[str, Any], smile: opensmile.Smile, audio_column: str
-    ) -> Dict[str, Any]:
-        """Extract features from a single audio sample using openSMILE."""
-        # Extracting audio data
-        audio_array = sample[audio_column]["array"]
-        sampling_rate = sample[audio_column]["sampling_rate"]
-
-        # Processing the audio sample to compute features
+Args:
+            sample (Audio): The audio object.
+            smile (opensmile.Smile): The openSMILE feature extractor.
+
+Returns:
+            Dict[str, Any]: The extracted features as a dictionary.
+        """
+        audio_array = sample.waveform.squeeze().numpy()
+        sampling_rate = sample.sampling_rate
         sample_features = smile.process_signal(audio_array, sampling_rate)
-        return sample_features.to_dict("list")
-
-    hf_dataset = _from_dict_to_hf_dataset(dataset)
-    unnecessary_columns = [
-        col for col in hf_dataset.column_names if col != audio_column
-    ]
-    hf_dataset = hf_dataset.remove_columns(unnecessary_columns)
-
-    smile = _load_opensmile_model(feature_set, feature_level)
-    features_dataset = hf_dataset.map(
-        _extract_feats_from_row,
-        fn_kwargs={"smile": smile, "audio_column": audio_column},
-    )
-    features_dataset = features_dataset.remove_columns([audio_column])
-    return _from_hf_dataset_to_dict(features_dataset)
+        # Convert to a dictionary with float values and return it
+        return {k: v[0] if isinstance(v, list) and len(v) == 1 else v for k, v in sample_features.to_dict("list").items()}
+
+    smile = OpenSmileFeatureExtractorFactory.get_opensmile_extractor(feature_set, feature_level)
+    features = [_extract_feats_from_audio(audio, smile) for audio in audios]
+    return features
+'''
diff --git a/src/senselab/audio/tasks/features_extraction/opensmile_pydra.py b/src/senselab/audio/tasks/features_extraction/opensmile_pydra.py
@@ -1,9 +1,11 @@
-"""This module defines a pydra API for praat_parselmouth features extraction."""
+# ruff: noqa
+'''"""This module defines a pydra API for praat_parselmouth features extraction."""
 
 import pydra
 
 from senselab.audio.tasks.features_extraction.opensmile import (
-    extract_feats_from_dataset,
+    extract_feats_from_audios,
 )
 
-extract_feats_from_dataset_pt = pydra.mark.task(extract_feats_from_dataset)
+extract_feats_from_audios_pt = pydra.mark.task(extract_feats_from_audios)
+'''