Merge pull request #12 from letuananh/dev

update media & ELAN APIs
neocl · Apr 30, 2021 · 62f64a8 · 62f64a8
2 parents b34849d + 38055a3
commit 62f64a8
Show file tree

Hide file tree

Showing 12 changed files with 255 additions and 40 deletions.
diff --git a/README.md b/README.md
@@ -12,6 +12,7 @@ Main functions are:
 - Manipuling [ELAN](https://archive.mpi.nl/tla/elan/download>) transcription files directly in ELAN Annotation Format (eaf)
 - TIG - A human-friendly intelinear gloss format for linguistic documentation
 - Multiple storage formats (text, CSV, JSON, SQLite databases)
+- Cutting, converting, and merging audio/video files
 
 ## Useful Links
 
@@ -26,15 +27,15 @@ Main functions are:
 pip install speach
 ```
 
-## ELAN support
+## Sample codes
 
-Speach can be used to extract annotations as well as metadata from ELAN transcripts, for example:
+Speach can extract annotations and metadata from ELAN transcripts directly, for example:
 
 ``` python
 from speach import elan
 
 # Test ELAN reader function in speach
-eaf = elan.open_eaf('./test/data/test.eaf')
+eaf = elan.read_eaf('./test/data/test.eaf')
 
 # accessing tiers & annotations
 for tier in eaf:
@@ -50,4 +51,12 @@ Speach also provides command line tools for processing EAF files.
 python -m speach eaf2csv input_elan_file.eaf -o output_file_name.csv
 ```
 
+Processing media files
+
+```python
+>>> from speach import media
+>>> media.convert("~/Documents/test.wav", "~/Documents/test.ogg")
+>>> media.cut(ELAN_DIR / "test.wav", ELAN_DIR / "test_10-15.ogg", from_ts="00:00:10", to_ts="00:00:15")
+```
+
 Read [Speach documentation](https://speach.readthedocs.io/) for more information.
diff --git a/demo_batch_processing_eaf_files.py b/demo_batch_processing_eaf_files.py
@@ -8,7 +8,7 @@
     if child_file.suffix.endswith('.eaf'):
         print(child_file.name)
         c = 0
-        eaf = elan.open_eaf(child_file)
+        eaf = elan.read_eaf(child_file)
         for tier in eaf.roots:
             if tier.type_ref == 'Utterance':
                 print(f"  | {tier.ID} | Participant: {tier.participant} | Type: {tier.type_ref}")

diff --git a/demo_elan.py b/demo_elan.py
@@ -1,7 +1,7 @@
 from speach import elan
 
 # read an ELAN file
-eaf = elan.open_eaf('./test/data/test.eaf')
+eaf = elan.read_eaf('./test/data/test.eaf')
 
 # accessing metadata
 print("Accessing EAF Metadata")

diff --git a/demo_media.py b/demo_media.py
@@ -0,0 +1,19 @@
+from pathlib import Path
+from speach import media
+from speach import elan
+
+
+ELAN_DIR = Path("~/Documents/ELAN")
+
+# converting a wave file into an ogg file
+media.convert(ELAN_DIR / "test.wav", ELAN_DIR / "test.ogg")
+
+# cutting audio file by timestamps
+media.cut(ELAN_DIR / "test.wav", ELAN_DIR / "test_before10.ogg", to_ts="00:00:10")
+media.cut(ELAN_DIR / "test.wav", ELAN_DIR / "test_after15.ogg", from_ts="00:00:15")
+media.cut(ELAN_DIR / "test.wav", ELAN_DIR / "test_10-15.ogg", from_ts="00:00:10", to_ts="00:00:15")
+
+# Cutting ELAN transcription
+eaf = elan.read_eaf(ELAN_DIR / "test.eaf")
+for idx, ann in enumerate(eaf["Person1 (Utterance)"], start=1):
+    eaf.cut(ann, ELAN_DIR / f"test_person1_{idx}.ogg")
diff --git a/docs/api_elan.rst b/docs/api_elan.rst
@@ -1,14 +1,19 @@
+.. _api_elan:
+
 ELAN module
 ===========
 
 ``speach`` supports reading and manipulating multi-tier transcriptions from ELAN directly.
+
+For common code samples to processing ELAN, see :ref:`tut_elan` page.
 
 .. automodule:: speach.elan
-   :members: open_eaf, parse_eaf_stream
+   :members: read_eaf, parse_eaf_stream
 
 .. autoclass:: ELANDoc
    :members:
    :member-order: groupwise
+   :exclude-members: read_eaf, parse_eaf_stream
 
 .. autoclass:: ELANTier
    :members:

diff --git a/docs/elan.rst b/docs/elan.rst
@@ -1,13 +1,17 @@
+.. _tut_elan:
+
 ELAN Recipes
 ============
 
 Common snippets for processing ELAN transcriptions with ``speach``.
 
+For in-depth API reference, see :ref:`api_elan` page.
+
 Open an ELAN file
 -----------------
 
     >>> from speach import elan
-    >>> eaf = elan.open_eaf('./data/test.eaf')
+    >>> eaf = elan.read_eaf('./data/test.eaf')
     >>> eaf
     <speach.elan.ELANDoc object at 0x7f67790593d0>
 
@@ -46,15 +50,26 @@ If you want to loop through the root tiers only, you can use the :code:`roots` l
 
 .. code-block:: python
 
-    eaf = elan.open_eaf('./data/test_nested.eaf')
+    eaf = elan.read_eaf('./data/test_nested.eaf')
     # accessing nested tiers
     for tier in eaf.roots:
         print(f"{tier.ID} | Participant: {tier.participant} | Type: {tier.type_ref}")
         for child_tier in tier.children:
             print(f"    | {child_tier.ID} | Participant: {child_tier.participant} | Type: {child_tier.type_ref}")
             for ann in child_tier.annotations:
                 print(f"    |- {ann.ID.rjust(4, ' ')}. [{ann.from_ts} -- {ann.to_ts}] {ann.text}")
-         
+
+Cutting annotations to separate audio files
+-------------------------------------------
+
+Annotations can be cut and stored into separate audio files using :func:`speach.elan.ELANDoc.cut` method.
+
+.. code-block:: python
+
+   eaf = elan.read_eaf(ELAN_DIR / "test.eaf")
+   for idx, ann in enumerate(eaf["Person1 (Utterance)"], start=1):
+       eaf.cut(ann, ELAN_DIR / f"test_person1_{idx}.ogg")
+                
 Converting ELAN files to CSV
 ----------------------------
 

diff --git a/docs/index.rst b/docs/index.rst
@@ -38,7 +38,7 @@ Speach can be used to extract annotations as well as metadata from ELAN transcri
     from speach import elan
 
     # Test ELAN reader function in speach
-    eaf = elan.open_eaf('./test/data/test.eaf')
+    eaf = elan.read_eaf('./test/data/test.eaf')
 
     # accessing tiers & annotations
     for tier in eaf:
@@ -68,9 +68,9 @@ More information:
 Useful Links
 ------------
 
-- Speach documentation: https://speach.readthedocs.io/
-- Speach on PyPI: https://pypi.org/project/speach/
 - Soure code: https://github.com/neocl/speach/
+- Speach on PyPI: https://pypi.org/project/speach/
+- Speach documentation: https://speach.readthedocs.io/
 
 Release Notes
 -------------

diff --git a/docs/media.rst b/docs/media.rst
@@ -14,13 +14,35 @@ Installing ffmpeg
 
 Sample code
 -----------
+
+   Just import media module from speach library to start using it
+
+   >>> from speach import media
+
+   converting the wave file ``test.wav`` in Documents folder into OGG format ``test.ogg``
+
+   >>> media.convert("~/Documents/test.wav", "~/Documents/test.ogg")
+
+  cutting ``test.wav`` from the beginning to 00:00:10 and write output to ``test_before10.ogg``
+
+   >>> media.cut("test.wav", "test_before10.ogg", to_ts="00:00:10")
+
+  cutting ``test.wav`` from 00:00:15 to the end of the file and write output to ``test_after15.ogg``
 
-.. code:: python
+   >>> media.cut("test.wav", ELAN_DIR / "test_after15.ogg", from_ts="00:00:15")
+
+   cutting ``test.wav`` from 00:00:15 to 00:00:15 and write output to ``test_10-15.ogg``
+
+   >>> media.cut(ELAN_DIR / "test.wav", ELAN_DIR / "test_10-15.ogg", from_ts="00:00:10", to_ts="00:00:15")
+
+Querying ffmpeg information
+---------------------------
 
-   from speach import media
-   print(media.version())
-   print(media.locate_ffmpeg())
-   media.convert("infile.mp3", "outfile.wav")
+    >>> from speach import media
+    >>> media.version()
+    '4.2.4-1ubuntu0.1'
+    >>> media.locate_ffmpeg()
+    '/usr/bin/ffmpeg'
 
 Others
 ------

diff --git a/docs/updates.rst b/docs/updates.rst
@@ -6,6 +6,9 @@ Updates
 2021-04-30
 ----------
 
+- Added :func:`speach.elan.ELANDoc.cut` function to cut annotations to separate audio files.
+- Expand user home directory automatically when using :func:`speach.elan.read_eaf` function.
+- Module :mod:`speach.media` supports converting media files, cutting them by timestamps, and demuxer concat.
 - Package :ref:`Victoria Chua <contributors>`'s media processing code into ``speach.media`` module.
 
 2021-04-28

diff --git a/speach/elan.py b/speach/elan.py
@@ -8,7 +8,7 @@
 # :copyright: (c) 2018 Le Tuan Anh <tuananh.ke@gmail.com>
 # :license: MIT, see LICENSE for more details.
 
-
+import os
 import logging
 from collections import OrderedDict
 from collections import defaultdict as dd
@@ -20,6 +20,7 @@
 
 from .__version__ import __issue__
 from .vtt import sec2ts, ts2sec
+from .media import cut
 
 
 # ----------------------------------------------------------------------
@@ -240,6 +241,11 @@ def __init__(self, type_ref_id, participant, ID, doc=None, default_locale=None,
         self.__annotations = []
         self.__xml_node = xml_node
 
+    @property
+    def name(self):
+        """ An alias to tier's ID """
+        return self.ID
+
     @property
     def annotations(self):
         return self.__annotations
@@ -449,10 +455,27 @@ def __init__(self, **kwargs):
         self.__constraints = []
         self.__vocabs = []
         self.__roots = []
+        self.path = None
         self.__xml_root = None
         self.__xml_header_node = None
         self.__xml_time_order_node = None
 
+    def media_path(self):
+        """ Try to determine the best path to source media file """
+        mpath = self.relative_media_url
+        if os.path.isfile(mpath):
+            return mpath
+        # try to join with eaf path if possible
+        if self.path:
+            mpath = os.path.join(os.path.dirname(self.path), mpath)
+            if os.path.isfile(mpath):
+                return mpath
+        # otherwise use media_url
+        mpath = self.media_url
+        if mpath.startswith("file://"):
+            mpath = mpath[7:]
+        return mpath
+
     def annotation(self, ID):
         """ Get annotation by ID """
         return self.__ann_map.get(ID, None)
@@ -625,10 +648,47 @@ def save(self, path, encoding='utf-8', xml_declaration=None,
                                    short_empty_elements=short_empty_elements)
         chio.write_file(path, _content, encoding=encoding)
 
+    def cut(self, section, outfile, media_file=None):
+        """ Cut the source media with timestamps defined in section object 
+
+        For example, the following code cut all annotations in tier "Tier 1" into appopriate audio files
+
+        >>> for idx, ann in enumerate(eaf["Tier 1"], start=1):
+        >>>     eaf.cut(ann, f"tier1_ann{idx}.wav")
+
+        :param section: Any object with ``from_ts`` and ``to_ts`` attributes which return TimeSlot objects
+        :param outfile: Path to output media file, must not exist or a FileExistsError will be raised
+        :param media_file: Use to specify source media file. This will override the value specified in source EAF file
+        :raises: FileExistsError, ValueError
+        """
+        if section is None:
+            raise ValueError("Annotation object cannot be empty")
+        elif not section.from_ts or not section.to_ts:
+            raise ValueError("Annotation object must be time-alignable")
+        elif media_file is None:
+            media_file = self.media_path()
+        # verify that media_file exists
+        if not os.path.isfile(media_file):
+            raise FileNotFoundError(f"Source media file ({media_file}) could not be found")
+        cut(media_file, outfile, from_ts=section.from_ts, to_ts=section.to_ts)
+
     @classmethod
-    def open_eaf(cls, eaf_path, encoding='utf-8', *args, **kwargs):
+    def read_eaf(cls, eaf_path, encoding='utf-8', *args, **kwargs):
+        """ Read an EAF file 
+
+        >>> from speach import elan
+        >>> eaf = elan.read_eaf("myfile.eaf")
+
+        :param eaf_path: Path to existing EAF file
+        :type eaf_path: str or Path-like object
+        """
+        eaf_path = str(eaf_path)
+        if eaf_path.startswith("~"):
+            eaf_path = os.path.expanduser(eaf_path)
         with chio.open(eaf_path, encoding=encoding, *args, **kwargs) as eaf_stream:
-            return cls.parse_eaf_stream(eaf_stream)
+            _doc = cls.parse_eaf_stream(eaf_stream)
+            _doc.path = eaf_path
+            return _doc
 
     @classmethod
     def parse_eaf_stream(cls, eaf_stream):
@@ -679,5 +739,6 @@ def parse_eaf_stream(cls, eaf_stream):
         return _doc
 
 
-open_eaf = ELANDoc.open_eaf
+open_eaf = ELANDoc.read_eaf
+read_eaf = ELANDoc.read_eaf
 parse_eaf_stream = ELANDoc.parse_eaf_stream