Merge pull request #17 from letuananh/dev

refactor media module + add test_data (audio + transcription)
neocl · May 11, 2021 · 9a2fbdd · 9a2fbdd
2 parents 1b6a529 + 4500007
commit 9a2fbdd
Show file tree

Hide file tree

Showing 11 changed files with 966 additions and 23 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
+.idea/
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

diff --git a/demo_media.py b/demo_media.py
@@ -1,19 +1,37 @@
-from pathlib import Path
+import os
 from speach import media
 from speach import elan
+from chirptext import chio
 
+# -----------------------------------------------------------------------------
+# create a folder to store processed data
+# -----------------------------------------------------------------------------
+if not os.path.isdir("./test_data/processed"):
+    os.mkdir("./test_data/processed")
 
-ELAN_DIR = Path("~/Documents/ELAN")
-
-# converting a wave file into an ogg file
-media.convert(ELAN_DIR / "test.wav", ELAN_DIR / "test.ogg")
+# -----------------------------------------------------------------------------
+# converting the source ogg file into m4a format
+# -----------------------------------------------------------------------------
+media.convert("./test_data/fables_01_03_aesop_64kb.ogg", "./test_data/processed/test.m4a")
 
+# -----------------------------------------------------------------------------
 # cutting audio file by timestamps
-media.cut(ELAN_DIR / "test.wav", ELAN_DIR / "test_before10.ogg", to_ts="00:00:10")
-media.cut(ELAN_DIR / "test.wav", ELAN_DIR / "test_after15.ogg", from_ts="00:00:15")
-media.cut(ELAN_DIR / "test.wav", ELAN_DIR / "test_10-15.ogg", from_ts="00:00:10", to_ts="00:00:15")
+# -----------------------------------------------------------------------------
+media.cut("./test_data/processed/test.m4a", "./test_data/processed/test_before_10.ogg", to_ts="00:00:10")
+media.cut("./test_data/processed/test.m4a", "./test_data/processed/test_after_10.ogg", from_ts="00:00:15")
+media.cut("./test_data/processed/test.m4a", "./test_data/processed/test_10-15.ogg", from_ts="00:00:10", to_ts="00:00:15")
 
-# Cutting ELAN transcription
-eaf = elan.read_eaf(ELAN_DIR / "test.eaf")
-for idx, ann in enumerate(eaf["Person1 (Utterance)"], start=1):
-    eaf.cut(ann, ELAN_DIR / f"test_person1_{idx}.ogg")
+# --------------------------------------------------------------------------------------------
+# More complex use case
+# Read an ELAN transcription file and:
+#    1. Cut all utterances into separated ogg files
+#    2. Write annotation text into separated text files
+#    3. Write all utterances into a CSV file with annotation IDs and individual audio filenames
+# --------------------------------------------------------------------------------------------
+eaf = elan.read_eaf("./test_data/fables_01_03_aesop_64kb.eaf")
+csv_rows = [["annID", "Text", "Filename"]]
+for ann in eaf["Story"]:
+    csv_rows.append([ann.ID, ann.text, f"test_{ann.ID}.ogg"])
+    chio.write_file(f"./test_data/processed/test_{ann.ID}.txt", ann.text)
+    eaf.cut(ann, f"./test_data/processed/test_{ann.ID}.ogg")
+chio.write_csv("./test_data/processed/test_sentences.csv", csv_rows)
diff --git a/speach/media.py b/speach/media.py
@@ -81,12 +81,18 @@ def _ffmpeg(*args, ffmpeg_path=None, capture_output=False, text=None, check=Fals
                               text=text, check=check)
     else:
         if capture_output:
-            output = subprocess.run([ffmpeg_path, *(str(x) for x in args)],
+            procinfo = subprocess.run([ffmpeg_path, *(str(x) for x in args)],
                                     stdout=subprocess.PIPE,
-                                    stderr=subprocess.DEVNULL, check=check)
+                                    stderr=subprocess.PIPE, check=check)
         else:
-            output = subprocess.run([ffmpeg_path, *(str(x) for x in args)], check=check)
-        return output.decoding(encoding='utf-8') if text else output
+            procinfo = subprocess.run([ffmpeg_path, *(str(x) for x in args)], check=check)
+        # Python < 3.7 does not support kwarg text
+        if text:
+            if procinfo.stdout:
+                procinfo.stdout = procinfo.stdout.decode(encoding='utf-8')
+            if procinfo.stderr:
+                procinfo.stderr = procinfo.stderr.decode(encoding='utf-8')
+        return procinfo
 
 
 def _norm_path(p):
@@ -126,7 +132,10 @@ def version(ffmpeg_path=None):
     >>> media.version()
     '4.2.4-1ubuntu0.1'
     """
-    output = _ffmpeg("-version", capture_output=True, text=True, ffmpeg_path=ffmpeg_path)
+    try:
+        output = _ffmpeg("-version", capture_output=True, text=True, ffmpeg_path=ffmpeg_path, check=False)
+    except FileNotFoundError:
+        return None
     version_line = output.stdout.splitlines()[0] if output and output.stdout else ''
     parts = version_line.split()
     if parts and len(parts) > 3 and parts[0] == 'ffmpeg' and parts[1] == 'version':
@@ -219,3 +228,25 @@ def convert(infile, outfile, *args, ffmpeg_path=None):
     """
     infile, outfile = _validate_args(infile, outfile)
     _ffmpeg("-i", str(infile), *args, str(outfile), ffmpeg_path=ffmpeg_path)
+
+
+def metadata(infile, *args, ffmpeg_path=None):
+    """ Read metadata of a given media file
+    """
+    _proc = _ffmpeg("-i", str(infile), capture_output=True, text=True, ffmpeg_path=ffmpeg_path)
+    # ffmpeg output metadata to stderr instead of stdout
+    lines = _proc.stderr.splitlines()
+    meta = {}
+    for l in lines:
+        if l.startswith("    title"):
+            meta["title"] = l.split(":", maxsplit=1)[1].strip()
+        elif l.startswith("    artist"):
+            meta["artist"] = l.split(":", maxsplit=1)[1].strip()
+        elif l.startswith("    album"):
+            meta["album"] = l.split(":", maxsplit=1)[1].strip()
+        elif l.startswith("  Duration:"):
+            parts = l.split(",")
+            for p in parts:
+                k, v = p.split(":", maxsplit=1)
+                meta[k.strip()] = v.strip()
+    return meta
diff --git a/test/test_media.py b/test/test_media.py
@@ -11,7 +11,7 @@
 
 import os
 import unittest
-import logging
+from pathlib import Path
 
 from speach import media
 
@@ -20,28 +20,43 @@
 # Configuration
 # -------------------------------------------------------------------------------
 
-TEST_DIR = os.path.dirname(os.path.realpath(__file__))
-
-
-def getLogger():
-    return logging.getLogger(__name__)
+TEST_DIR = Path(os.path.abspath(__file__)).parent
+TEST_OGG = TEST_DIR.parent / "./test_data/fables_01_03_aesop_64kb.ogg"
+TEST_WAV = TEST_DIR.parent / "./test_data/fables_01_03_aesop_64kb.wav"
 
 
 # -------------------------------------------------------------------------------
 # Tests
 # -------------------------------------------------------------------------------
 
+@unittest.skipIf(not media.version(), "ffmpeg is not available. TestMedia will be skipped! For more information see: https://ffmpeg.org")
 class TestMedia(unittest.TestCase):
 
     def test_ffmpeg_version(self):
         ffmpeg_version = media.version()
         self.assertTrue(ffmpeg_version)
+        print(f"Testing media with ffmpeg version {ffmpeg_version}")
 
     def test_locate_ffmpeg(self):
         ffmpeg_loc = media.locate_ffmpeg()
         self.assertTrue(ffmpeg_loc)
         self.assertIn('ffmpeg', ffmpeg_loc)
 
+    def test_read_metadata(self):
+        meta = media.metadata(TEST_OGG)
+        expected = {'title': 'The Cat and the Mice',
+                    'artist': 'Aesop',
+                    'album': "Aesop's Fables Volume 1",
+                    'Duration': '00:01:41.46',
+                    'start': '0.025057',
+                    'bitrate': '64 kb/s'}
+
+    def test_convert(self):
+        if TEST_WAV.is_file():
+            TEST_WAV.unlink()
+        media.convert(TEST_OGG, TEST_WAV, "-loglevel", "error")
+        self.assertTrue(TEST_WAV.is_file())
+
 
 # -------------------------------------------------------------------------------
 # MAIN

diff --git a/test_data/.gitignore b/test_data/.gitignore
@@ -0,0 +1,2 @@
+fables_01_03_aesop_64kb.wav
+processed/
diff --git a/test_data/README.md b/test_data/README.md
@@ -0,0 +1,6 @@
+# Test data
+
+Copyright: Both audio and text are in the public domain
+
+- Audio source: https://librivox.org/aesops-fables-volume-1-fables-1-25/
+- Text source: https://www.gutenberg.org/ebooks/11339