Skip to content

Commit

Permalink
Merge pull request #17 from letuananh/dev
Browse files Browse the repository at this point in the history
refactor media module + add test_data (audio + transcription)
  • Loading branch information
letuananh committed May 11, 2021
2 parents 1b6a529 + 4500007 commit 9a2fbdd
Show file tree
Hide file tree
Showing 11 changed files with 966 additions and 23 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.idea/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
42 changes: 30 additions & 12 deletions demo_media.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,37 @@
from pathlib import Path
import os
from speach import media
from speach import elan
from chirptext import chio

# -----------------------------------------------------------------------------
# create a folder to store processed data
# -----------------------------------------------------------------------------
if not os.path.isdir("./test_data/processed"):
os.mkdir("./test_data/processed")

ELAN_DIR = Path("~/Documents/ELAN")

# converting a wave file into an ogg file
media.convert(ELAN_DIR / "test.wav", ELAN_DIR / "test.ogg")
# -----------------------------------------------------------------------------
# converting the source ogg file into m4a format
# -----------------------------------------------------------------------------
media.convert("./test_data/fables_01_03_aesop_64kb.ogg", "./test_data/processed/test.m4a")

# -----------------------------------------------------------------------------
# cutting audio file by timestamps
media.cut(ELAN_DIR / "test.wav", ELAN_DIR / "test_before10.ogg", to_ts="00:00:10")
media.cut(ELAN_DIR / "test.wav", ELAN_DIR / "test_after15.ogg", from_ts="00:00:15")
media.cut(ELAN_DIR / "test.wav", ELAN_DIR / "test_10-15.ogg", from_ts="00:00:10", to_ts="00:00:15")
# -----------------------------------------------------------------------------
media.cut("./test_data/processed/test.m4a", "./test_data/processed/test_before_10.ogg", to_ts="00:00:10")
media.cut("./test_data/processed/test.m4a", "./test_data/processed/test_after_10.ogg", from_ts="00:00:15")
media.cut("./test_data/processed/test.m4a", "./test_data/processed/test_10-15.ogg", from_ts="00:00:10", to_ts="00:00:15")

# Cutting ELAN transcription
eaf = elan.read_eaf(ELAN_DIR / "test.eaf")
for idx, ann in enumerate(eaf["Person1 (Utterance)"], start=1):
eaf.cut(ann, ELAN_DIR / f"test_person1_{idx}.ogg")
# --------------------------------------------------------------------------------------------
# More complex use case
# Read an ELAN transcription file and:
# 1. Cut all utterances into separated ogg files
# 2. Write annotation text into separated text files
# 3. Write all utterances into a CSV file with annotation IDs and individual audio filenames
# --------------------------------------------------------------------------------------------
eaf = elan.read_eaf("./test_data/fables_01_03_aesop_64kb.eaf")
csv_rows = [["annID", "Text", "Filename"]]
for ann in eaf["Story"]:
csv_rows.append([ann.ID, ann.text, f"test_{ann.ID}.ogg"])
chio.write_file(f"./test_data/processed/test_{ann.ID}.txt", ann.text)
eaf.cut(ann, f"./test_data/processed/test_{ann.ID}.ogg")
chio.write_csv("./test_data/processed/test_sentences.csv", csv_rows)
41 changes: 36 additions & 5 deletions speach/media.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,18 @@ def _ffmpeg(*args, ffmpeg_path=None, capture_output=False, text=None, check=Fals
text=text, check=check)
else:
if capture_output:
output = subprocess.run([ffmpeg_path, *(str(x) for x in args)],
procinfo = subprocess.run([ffmpeg_path, *(str(x) for x in args)],
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL, check=check)
stderr=subprocess.PIPE, check=check)
else:
output = subprocess.run([ffmpeg_path, *(str(x) for x in args)], check=check)
return output.decoding(encoding='utf-8') if text else output
procinfo = subprocess.run([ffmpeg_path, *(str(x) for x in args)], check=check)
# Python < 3.7 does not support kwarg text
if text:
if procinfo.stdout:
procinfo.stdout = procinfo.stdout.decode(encoding='utf-8')
if procinfo.stderr:
procinfo.stderr = procinfo.stderr.decode(encoding='utf-8')
return procinfo


def _norm_path(p):
Expand Down Expand Up @@ -126,7 +132,10 @@ def version(ffmpeg_path=None):
>>> media.version()
'4.2.4-1ubuntu0.1'
"""
output = _ffmpeg("-version", capture_output=True, text=True, ffmpeg_path=ffmpeg_path)
try:
output = _ffmpeg("-version", capture_output=True, text=True, ffmpeg_path=ffmpeg_path, check=False)
except FileNotFoundError:
return None
version_line = output.stdout.splitlines()[0] if output and output.stdout else ''
parts = version_line.split()
if parts and len(parts) > 3 and parts[0] == 'ffmpeg' and parts[1] == 'version':
Expand Down Expand Up @@ -219,3 +228,25 @@ def convert(infile, outfile, *args, ffmpeg_path=None):
"""
infile, outfile = _validate_args(infile, outfile)
_ffmpeg("-i", str(infile), *args, str(outfile), ffmpeg_path=ffmpeg_path)


def metadata(infile, *args, ffmpeg_path=None):
""" Read metadata of a given media file
"""
_proc = _ffmpeg("-i", str(infile), capture_output=True, text=True, ffmpeg_path=ffmpeg_path)
# ffmpeg output metadata to stderr instead of stdout
lines = _proc.stderr.splitlines()
meta = {}
for l in lines:
if l.startswith(" title"):
meta["title"] = l.split(":", maxsplit=1)[1].strip()
elif l.startswith(" artist"):
meta["artist"] = l.split(":", maxsplit=1)[1].strip()
elif l.startswith(" album"):
meta["album"] = l.split(":", maxsplit=1)[1].strip()
elif l.startswith(" Duration:"):
parts = l.split(",")
for p in parts:
k, v = p.split(":", maxsplit=1)
meta[k.strip()] = v.strip()
return meta
27 changes: 21 additions & 6 deletions test/test_media.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import os
import unittest
import logging
from pathlib import Path

from speach import media

Expand All @@ -20,28 +20,43 @@
# Configuration
# -------------------------------------------------------------------------------

TEST_DIR = os.path.dirname(os.path.realpath(__file__))


def getLogger():
return logging.getLogger(__name__)
TEST_DIR = Path(os.path.abspath(__file__)).parent
TEST_OGG = TEST_DIR.parent / "./test_data/fables_01_03_aesop_64kb.ogg"
TEST_WAV = TEST_DIR.parent / "./test_data/fables_01_03_aesop_64kb.wav"


# -------------------------------------------------------------------------------
# Tests
# -------------------------------------------------------------------------------

@unittest.skipIf(not media.version(), "ffmpeg is not available. TestMedia will be skipped! For more information see: https://ffmpeg.org")
class TestMedia(unittest.TestCase):

def test_ffmpeg_version(self):
ffmpeg_version = media.version()
self.assertTrue(ffmpeg_version)
print(f"Testing media with ffmpeg version {ffmpeg_version}")

def test_locate_ffmpeg(self):
ffmpeg_loc = media.locate_ffmpeg()
self.assertTrue(ffmpeg_loc)
self.assertIn('ffmpeg', ffmpeg_loc)

def test_read_metadata(self):
meta = media.metadata(TEST_OGG)
expected = {'title': 'The Cat and the Mice',
'artist': 'Aesop',
'album': "Aesop's Fables Volume 1",
'Duration': '00:01:41.46',
'start': '0.025057',
'bitrate': '64 kb/s'}

def test_convert(self):
if TEST_WAV.is_file():
TEST_WAV.unlink()
media.convert(TEST_OGG, TEST_WAV, "-loglevel", "error")
self.assertTrue(TEST_WAV.is_file())


# -------------------------------------------------------------------------------
# MAIN
Expand Down
2 changes: 2 additions & 0 deletions test_data/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
fables_01_03_aesop_64kb.wav
processed/
6 changes: 6 additions & 0 deletions test_data/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Test data

Copyright: Both audio and text are in the public domain

- Audio source: https://librivox.org/aesops-fables-volume-1-fables-1-25/
- Text source: https://www.gutenberg.org/ebooks/11339

0 comments on commit 9a2fbdd

Please sign in to comment.