Skip to content

Commit

Permalink
Merge pull request #12 from letuananh/dev
Browse files Browse the repository at this point in the history
update media & ELAN APIs
  • Loading branch information
letuananh committed Apr 30, 2021
2 parents b34849d + 38055a3 commit 62f64a8
Show file tree
Hide file tree
Showing 12 changed files with 255 additions and 40 deletions.
15 changes: 12 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Main functions are:
- Manipuling [ELAN](https://archive.mpi.nl/tla/elan/download>) transcription files directly in ELAN Annotation Format (eaf)
- TIG - A human-friendly intelinear gloss format for linguistic documentation
- Multiple storage formats (text, CSV, JSON, SQLite databases)
- Cutting, converting, and merging audio/video files

## Useful Links

Expand All @@ -26,15 +27,15 @@ Main functions are:
pip install speach
```

## ELAN support
## Sample codes

Speach can be used to extract annotations as well as metadata from ELAN transcripts, for example:
Speach can extract annotations and metadata from ELAN transcripts directly, for example:

``` python
from speach import elan

# Test ELAN reader function in speach
eaf = elan.open_eaf('./test/data/test.eaf')
eaf = elan.read_eaf('./test/data/test.eaf')

# accessing tiers & annotations
for tier in eaf:
Expand All @@ -50,4 +51,12 @@ Speach also provides command line tools for processing EAF files.
python -m speach eaf2csv input_elan_file.eaf -o output_file_name.csv
```

Processing media files

```python
>>> from speach import media
>>> media.convert("~/Documents/test.wav", "~/Documents/test.ogg")
>>> media.cut(ELAN_DIR / "test.wav", ELAN_DIR / "test_10-15.ogg", from_ts="00:00:10", to_ts="00:00:15")
```

Read [Speach documentation](https://speach.readthedocs.io/) for more information.
2 changes: 1 addition & 1 deletion demo_batch_processing_eaf_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
if child_file.suffix.endswith('.eaf'):
print(child_file.name)
c = 0
eaf = elan.open_eaf(child_file)
eaf = elan.read_eaf(child_file)
for tier in eaf.roots:
if tier.type_ref == 'Utterance':
print(f" | {tier.ID} | Participant: {tier.participant} | Type: {tier.type_ref}")
Expand Down
2 changes: 1 addition & 1 deletion demo_elan.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from speach import elan

# read an ELAN file
eaf = elan.open_eaf('./test/data/test.eaf')
eaf = elan.read_eaf('./test/data/test.eaf')

# accessing metadata
print("Accessing EAF Metadata")
Expand Down
19 changes: 19 additions & 0 deletions demo_media.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from pathlib import Path
from speach import media
from speach import elan


ELAN_DIR = Path("~/Documents/ELAN")

# converting a wave file into an ogg file
media.convert(ELAN_DIR / "test.wav", ELAN_DIR / "test.ogg")

# cutting audio file by timestamps
media.cut(ELAN_DIR / "test.wav", ELAN_DIR / "test_before10.ogg", to_ts="00:00:10")
media.cut(ELAN_DIR / "test.wav", ELAN_DIR / "test_after15.ogg", from_ts="00:00:15")
media.cut(ELAN_DIR / "test.wav", ELAN_DIR / "test_10-15.ogg", from_ts="00:00:10", to_ts="00:00:15")

# Cutting ELAN transcription
eaf = elan.read_eaf(ELAN_DIR / "test.eaf")
for idx, ann in enumerate(eaf["Person1 (Utterance)"], start=1):
eaf.cut(ann, ELAN_DIR / f"test_person1_{idx}.ogg")
7 changes: 6 additions & 1 deletion docs/api_elan.rst
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
.. _api_elan:

ELAN module
===========

``speach`` supports reading and manipulating multi-tier transcriptions from ELAN directly.

For common code samples to processing ELAN, see :ref:`tut_elan` page.

.. automodule:: speach.elan
:members: open_eaf, parse_eaf_stream
:members: read_eaf, parse_eaf_stream

.. autoclass:: ELANDoc
:members:
:member-order: groupwise
:exclude-members: read_eaf, parse_eaf_stream

.. autoclass:: ELANTier
:members:
Expand Down
21 changes: 18 additions & 3 deletions docs/elan.rst
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
.. _tut_elan:

ELAN Recipes
============

Common snippets for processing ELAN transcriptions with ``speach``.

For in-depth API reference, see :ref:`api_elan` page.

Open an ELAN file
-----------------

>>> from speach import elan
>>> eaf = elan.open_eaf('./data/test.eaf')
>>> eaf = elan.read_eaf('./data/test.eaf')
>>> eaf
<speach.elan.ELANDoc object at 0x7f67790593d0>

Expand Down Expand Up @@ -46,15 +50,26 @@ If you want to loop through the root tiers only, you can use the :code:`roots` l

.. code-block:: python
eaf = elan.open_eaf('./data/test_nested.eaf')
eaf = elan.read_eaf('./data/test_nested.eaf')
# accessing nested tiers
for tier in eaf.roots:
print(f"{tier.ID} | Participant: {tier.participant} | Type: {tier.type_ref}")
for child_tier in tier.children:
print(f" | {child_tier.ID} | Participant: {child_tier.participant} | Type: {child_tier.type_ref}")
for ann in child_tier.annotations:
print(f" |- {ann.ID.rjust(4, ' ')}. [{ann.from_ts} -- {ann.to_ts}] {ann.text}")
Cutting annotations to separate audio files
-------------------------------------------

Annotations can be cut and stored into separate audio files using :func:`speach.elan.ELANDoc.cut` method.

.. code-block:: python
eaf = elan.read_eaf(ELAN_DIR / "test.eaf")
for idx, ann in enumerate(eaf["Person1 (Utterance)"], start=1):
eaf.cut(ann, ELAN_DIR / f"test_person1_{idx}.ogg")
Converting ELAN files to CSV
----------------------------

Expand Down
6 changes: 3 additions & 3 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ Speach can be used to extract annotations as well as metadata from ELAN transcri
from speach import elan
# Test ELAN reader function in speach
eaf = elan.open_eaf('./test/data/test.eaf')
eaf = elan.read_eaf('./test/data/test.eaf')
# accessing tiers & annotations
for tier in eaf:
Expand Down Expand Up @@ -68,9 +68,9 @@ More information:
Useful Links
------------

- Speach documentation: https://speach.readthedocs.io/
- Speach on PyPI: https://pypi.org/project/speach/
- Soure code: https://github.com/neocl/speach/
- Speach on PyPI: https://pypi.org/project/speach/
- Speach documentation: https://speach.readthedocs.io/

Release Notes
-------------
Expand Down
32 changes: 27 additions & 5 deletions docs/media.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,35 @@ Installing ffmpeg

Sample code
-----------

Just import media module from speach library to start using it

>>> from speach import media

converting the wave file ``test.wav`` in Documents folder into OGG format ``test.ogg``

>>> media.convert("~/Documents/test.wav", "~/Documents/test.ogg")

cutting ``test.wav`` from the beginning to 00:00:10 and write output to ``test_before10.ogg``

>>> media.cut("test.wav", "test_before10.ogg", to_ts="00:00:10")

cutting ``test.wav`` from 00:00:15 to the end of the file and write output to ``test_after15.ogg``

.. code:: python
>>> media.cut("test.wav", ELAN_DIR / "test_after15.ogg", from_ts="00:00:15")

cutting ``test.wav`` from 00:00:15 to 00:00:15 and write output to ``test_10-15.ogg``

>>> media.cut(ELAN_DIR / "test.wav", ELAN_DIR / "test_10-15.ogg", from_ts="00:00:10", to_ts="00:00:15")

Querying ffmpeg information
---------------------------

from speach import media
print(media.version())
print(media.locate_ffmpeg())
media.convert("infile.mp3", "outfile.wav")
>>> from speach import media
>>> media.version()
'4.2.4-1ubuntu0.1'
>>> media.locate_ffmpeg()
'/usr/bin/ffmpeg'

Others
------
Expand Down
3 changes: 3 additions & 0 deletions docs/updates.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ Updates
2021-04-30
----------

- Added :func:`speach.elan.ELANDoc.cut` function to cut annotations to separate audio files.
- Expand user home directory automatically when using :func:`speach.elan.read_eaf` function.
- Module :mod:`speach.media` supports converting media files, cutting them by timestamps, and demuxer concat.
- Package :ref:`Victoria Chua <contributors>`'s media processing code into ``speach.media`` module.

2021-04-28
Expand Down
69 changes: 65 additions & 4 deletions speach/elan.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# :copyright: (c) 2018 Le Tuan Anh <tuananh.ke@gmail.com>
# :license: MIT, see LICENSE for more details.


import os
import logging
from collections import OrderedDict
from collections import defaultdict as dd
Expand All @@ -20,6 +20,7 @@

from .__version__ import __issue__
from .vtt import sec2ts, ts2sec
from .media import cut


# ----------------------------------------------------------------------
Expand Down Expand Up @@ -240,6 +241,11 @@ def __init__(self, type_ref_id, participant, ID, doc=None, default_locale=None,
self.__annotations = []
self.__xml_node = xml_node

@property
def name(self):
""" An alias to tier's ID """
return self.ID

@property
def annotations(self):
return self.__annotations
Expand Down Expand Up @@ -449,10 +455,27 @@ def __init__(self, **kwargs):
self.__constraints = []
self.__vocabs = []
self.__roots = []
self.path = None
self.__xml_root = None
self.__xml_header_node = None
self.__xml_time_order_node = None

def media_path(self):
""" Try to determine the best path to source media file """
mpath = self.relative_media_url
if os.path.isfile(mpath):
return mpath
# try to join with eaf path if possible
if self.path:
mpath = os.path.join(os.path.dirname(self.path), mpath)
if os.path.isfile(mpath):
return mpath
# otherwise use media_url
mpath = self.media_url
if mpath.startswith("file://"):
mpath = mpath[7:]
return mpath

def annotation(self, ID):
""" Get annotation by ID """
return self.__ann_map.get(ID, None)
Expand Down Expand Up @@ -625,10 +648,47 @@ def save(self, path, encoding='utf-8', xml_declaration=None,
short_empty_elements=short_empty_elements)
chio.write_file(path, _content, encoding=encoding)

def cut(self, section, outfile, media_file=None):
""" Cut the source media with timestamps defined in section object
For example, the following code cut all annotations in tier "Tier 1" into appopriate audio files
>>> for idx, ann in enumerate(eaf["Tier 1"], start=1):
>>> eaf.cut(ann, f"tier1_ann{idx}.wav")
:param section: Any object with ``from_ts`` and ``to_ts`` attributes which return TimeSlot objects
:param outfile: Path to output media file, must not exist or a FileExistsError will be raised
:param media_file: Use to specify source media file. This will override the value specified in source EAF file
:raises: FileExistsError, ValueError
"""
if section is None:
raise ValueError("Annotation object cannot be empty")
elif not section.from_ts or not section.to_ts:
raise ValueError("Annotation object must be time-alignable")
elif media_file is None:
media_file = self.media_path()
# verify that media_file exists
if not os.path.isfile(media_file):
raise FileNotFoundError(f"Source media file ({media_file}) could not be found")
cut(media_file, outfile, from_ts=section.from_ts, to_ts=section.to_ts)

@classmethod
def open_eaf(cls, eaf_path, encoding='utf-8', *args, **kwargs):
def read_eaf(cls, eaf_path, encoding='utf-8', *args, **kwargs):
""" Read an EAF file
>>> from speach import elan
>>> eaf = elan.read_eaf("myfile.eaf")
:param eaf_path: Path to existing EAF file
:type eaf_path: str or Path-like object
"""
eaf_path = str(eaf_path)
if eaf_path.startswith("~"):
eaf_path = os.path.expanduser(eaf_path)
with chio.open(eaf_path, encoding=encoding, *args, **kwargs) as eaf_stream:
return cls.parse_eaf_stream(eaf_stream)
_doc = cls.parse_eaf_stream(eaf_stream)
_doc.path = eaf_path
return _doc

@classmethod
def parse_eaf_stream(cls, eaf_stream):
Expand Down Expand Up @@ -679,5 +739,6 @@ def parse_eaf_stream(cls, eaf_stream):
return _doc


open_eaf = ELANDoc.open_eaf
open_eaf = ELANDoc.read_eaf
read_eaf = ELANDoc.read_eaf
parse_eaf_stream = ELANDoc.parse_eaf_stream

0 comments on commit 62f64a8

Please sign in to comment.