Merge pull request #211 from CustomProgrammingSolutions/bugfix-empty-…

…wave-features [MRG] Fix issues with feature extraction and empty wav files
persephone-tools · Oct 30, 2018 · a28362a · a28362a
2 parents 92a6881 + b017278
commit a28362a
Show file tree

Hide file tree

Showing 4 changed files with 40 additions and 1 deletion.
diff --git a/changelog.md b/changelog.md
@@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 - `CorpusReader.train_batch_gen` raises StopIteration instead of returning None if no data can be generated.
 - Decoding from saved model is now possible for arbitrary Tensorflow model topologies that have the same input and output structure via named arguments that specify where input and output to the model occur.
 - RNN CTC model class now accepts `pathlib.Path` for directory argument
+- Empty wave files no longer crash on attempted feature extraction and are now skipped instead.
 
 ## [0.3.2]
 

diff --git a/persephone/preprocess/feat_extract.py b/persephone/preprocess/feat_extract.py
@@ -4,6 +4,8 @@
 import os
 from pathlib import Path
 import subprocess
+from typing import Union
+import wave
 
 import numpy as np
 import python_speech_features
@@ -14,6 +16,12 @@
 
 logger = logging.getLogger(__name__) #type: ignore
 
+def empty_wav(wav_path: Union[Path, str]) -> bool:
+    """Check if a wav contains data"""
+    with wave.open(str(wav_path), 'rb') as wav_f:
+        return wav_f.getnframes() == 0
+
+
 def extract_energy(rate, sig):
     """ Extracts the energy of frames. """
 
@@ -147,6 +155,8 @@ def all_wavs_processed() -> bool:
         logger.info("Preparing %s features for %s", feat_type, filename)
         path = os.path.join(dirname, filename)
         if path.endswith(".wav"):
+            if empty_wav(path):
+                raise PersephoneException("Can't extract features for {} since it is an empty WAV file. Remove it from the corpus.".format(path))
             if feat_type == "fbank":
                 fbank(path)
             elif feat_type == "fbank_and_pitch":

diff --git a/persephone/tests/test_feature_extraction.py b/persephone/tests/test_feature_extraction.py
@@ -0,0 +1,27 @@
+import pytest
+def test_empty_wave(tmp_path, create_note_sequence, make_wav):
+    """Test that empty wav files are detected"""
+    from persephone.preprocess.feat_extract import empty_wav
+    wavs_dir = tmp_path / "audio"
+    wavs_dir.mkdir()
+    no_data = []
+    empty_wav_path = wavs_dir / "empty.wav"
+    make_wav(no_data, str(empty_wav_path))
+    assert empty_wav(empty_wav_path)
+
+    data_a_b = create_note_sequence(notes=["A","B"])
+    wav_test1 = wavs_dir / "test1.wav"
+    make_wav(data_a_b, str(wav_test1))
+    assert not empty_wav(wav_test1)
+
+def test_empty_wave_skipped(tmp_path, make_wav):
+    """Test that an empty wave file will be skipped instead of crashing."""
+    from persephone.preprocess import feat_extract
+    from persephone.exceptions import PersephoneException
+    wavs_dir = tmp_path / "audio"
+    wavs_dir.mkdir()
+    no_data = []
+    empty_wav_path = wavs_dir / "empty.wav"
+    make_wav(no_data, str(empty_wav_path))
+    with pytest.raises(PersephoneException):
+        feat_extract.from_dir(wavs_dir, "fbank")
diff --git a/test_requirements.txt b/test_requirements.txt
@@ -1,5 +1,6 @@
 tox
+pluggy>0.7
 pylint>1.8
-pytest
+pytest>=3.9
 pytest-cov
 mypy>=0.6