Skip to content

Commit

Permalink
Merge pull request #24 from letuananh/main
Browse files Browse the repository at this point in the history
Fix tier participant code editing bug
  • Loading branch information
letuananh committed May 21, 2021
2 parents 79f34ce + bbee548 commit 5f89ce6
Show file tree
Hide file tree
Showing 12 changed files with 439 additions and 57 deletions.
17 changes: 12 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
[![Total alerts](https://img.shields.io/lgtm/alerts/g/neocl/speach.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/neocl/speach/alerts/)
[![Language grade: Python](https://img.shields.io/lgtm/grade/python/g/neocl/speach.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/neocl/speach/context:python)

Speach (formerly [texttaglib](https://github.com/letuananh/texttaglib/)), is a Python 3 library for managing, annotating, and converting natural language corpuses using popular formats (CoNLL, ELAN, Praat, CSV, JSON, SQLite, VTT, Audacity, TTL, TIG, ISF, etc.)
Speach (formerly [texttaglib](https://github.com/letuananh/texttaglib/)), is a Python 3 library for managing, annotating, and converting natural language corpuses using popular formats (CoNLL, ELAN, Praat, CSV, JSON, SQLite, VTT, Audacity, TTL, TTLIG, ISF, etc.)

Main functions are:

- Text corpus management
- Manipulating [ELAN](https://archive.mpi.nl/tla/elan/download>) transcription files directly in ELAN Annotation Format (eaf)
- TIG - A human-friendly intelinear gloss format for linguistic documentation
- Multiple storage formats (text, CSV, JSON, SQLite databases)
- Reading, editing, and writing ELAN transcriptions and related media files directly in [ELAN Annotation Format](https://archive.mpi.nl/tla/elan/download) (eaf)
- Cutting, converting, and merging audio/video files
- TTLIG (or TIG) - A human-friendly linguistic documentation format with intelinear gloss support
- Text corpus management using texttaglib format
- Multiple storage formats (text, CSV, JSON, SQLite databases)

## Useful Links

Expand Down Expand Up @@ -60,3 +60,10 @@ Processing media files
```

Read [Speach documentation](https://speach.readthedocs.io/) for more information.

## Contributors

- [Le Tuan Anh](https://github.com/letuananh) (Maintainer)
- [Victoria Chua](https://github.com/vicchuayh)

Contributors are welcome! If you want to help developing speach, please visit [Contributing](https://speach.readthedocs.io/en/latest/contributing.html) page.
2 changes: 1 addition & 1 deletion cov.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash

python3 -m coverage run --source texttaglib --branch -m unittest discover -s test
python3 -m coverage run --source speach --branch -m unittest discover -s test
python3 -m coverage html

6 changes: 3 additions & 3 deletions demo_elan.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from speach import elan

# read an ELAN file
eaf = elan.read_eaf('./test/data/test.eaf')
eaf = elan.read_eaf('./test_data/fables_01_03_aesop_64kb.eaf')

# accessing metadata
print("Accessing EAF Metadata")
Expand All @@ -13,7 +13,7 @@
print(f"Media relative URL: {eaf.relative_media_url}")

# loop through all tiers in this eaf file
print("\nLoop through all tiers")
print("\nBasic ELAN demo: looping through all tiers and their annotations")
print("-" * 60)
for tier in eaf:
print(f"{tier.ID} | Participant: {tier.participant} | Type: {tier.type_ref}")
Expand All @@ -22,7 +22,7 @@
print(f"{ann.ID.rjust(4, ' ')}. [{ann.from_ts} :: {ann.to_ts}] {ann.text}")

# loop through the root tiers only (i.e. ignored dependent tiers)
print("\nLoop through root tiers only")
print("\n\nDemo nested ELAN file: loop through root tiers only")
print("-" * 60)
for tier in eaf.roots:
print(f"[+]-- {tier.ID} | Participant: {tier.participant} | Type: {tier.type_ref}")
Expand Down
2 changes: 1 addition & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Main functions:
- TIG - A human-friendly intelinear gloss format for linguistic documentation
- Multiple storage formats (text files, JSON files, SQLite databases)

:ref:`Contributors <contributors>` are welcome!.
:ref:`Contributors <contributors>` are welcome!
If you want to help developing ``speach``, please visit :ref:`contributing` page.

Installation
Expand Down
3 changes: 2 additions & 1 deletion speach/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@
from .__version__ import __version_major__, __version_long__, __version__, __status__

from chirptext import ttl
from . import ttlig as tig # expose ttlig as tig
from .sqlite import TTLSQLite


__all__ = ['ttl', 'TTLSQLite',
__all__ = ['ttl', 'TTLSQLite', 'tig',
"__version__", "__author__", "__description__", "__copyright__"]
10 changes: 5 additions & 5 deletions speach/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from chirptext import chio
from chirptext.cli import CLIApp, setup_logging

from speach import ttl, TTLSQLite, tig, orgmode
from speach import ttl, TTLSQLite, ttlig, orgmode
from speach.elan import parse_eaf_stream

# ----------------------------------------------------------------------
Expand Down Expand Up @@ -78,7 +78,7 @@ def process_tig(cli, args):
sc = 0
ttl_writer = ttl.TxtWriter.from_path(args.output) if args.output else None
with chio.open(args.ttlig) as infile:
for sent in tig.read_stream_iter(infile):
for sent in ttlig.read_stream_iter(infile):
sc += 1
if ttl_writer is not None:
ttl_sent = sent.to_ttl()
Expand All @@ -94,7 +94,7 @@ def process_tig(cli, args):
output.print()
output.print()
with chio.open(args.ttlig) as infile:
for idx, sent in enumerate(tig.read_stream_iter(infile)):
for idx, sent in enumerate(ttlig.read_stream_iter(infile)):
sc += 1
output.print(sent.to_expex(default_ident=idx + 1))
output.print()
Expand All @@ -105,7 +105,7 @@ def process_tig(cli, args):


def jp_line_proc(line, iglines):
igrow = tig.text_to_igrow(line.replace('\u3000', ' ').strip())
igrow = ttlig.text_to_igrow(line.replace('\u3000', ' ').strip())
iglines.append(igrow.text)
iglines.append(igrow.tokens)
iglines.append("")
Expand Down Expand Up @@ -153,7 +153,7 @@ def make_text(sent, delimiter=' '):
for tk in sent:
furi = tk.find('furi', default=None)
if furi:
frags.append(tig.make_ruby_html(furi.label))
frags.append(ttlig.make_ruby_html(furi.label))
else:
frags.append(tk.text)
html_text = delimiter.join(frags) if frags else sent.text
Expand Down
4 changes: 2 additions & 2 deletions speach/__version__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,6 @@
__issue__ = "https://github.com/neocl/speach/issues/"
__maintainer__ = "Le Tuan Anh"
__version_major__ = "0.1" # follow PEP-0440
__version__ = "{}a6".format(__version_major__)
__version_long__ = "{} - Alpha 6".format(__version_major__)
__version__ = "{}a7".format(__version_major__)
__version_long__ = "{} - Alpha 7".format(__version_major__)
__status__ = "Prototype"
15 changes: 10 additions & 5 deletions speach/elan.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ def __str__(self):
class TimeAnnotation(Annotation):
""" An ELAN time-alignable annotation
"""

def __init__(self, ID, from_ts, to_ts, value, xml_node=None, **kwargs):
super().__init__(ID, value, xml_node=xml_node, **kwargs)
self.__from_ts = from_ts
Expand Down Expand Up @@ -344,8 +345,11 @@ def participant(self):

@participant.setter
def participant(self, value):
if self.__xml_node:
if self.__xml_node is not None:
self.__xml_node.set('PARTICIPANT', value)
else:
logging.getLogger(__name__).warning(
f"Could not update participant, DOM node is missing for tier {self.name}")
self.__participant = value

@property
Expand Down Expand Up @@ -469,7 +473,6 @@ def _add_annotation_xml(self, annotation_node) -> Annotation:


class CVEntry(DataObject):

""" A controlled vocabulary entry """

def __init__(self, xml_node=None, **kwargs):
Expand Down Expand Up @@ -508,6 +511,7 @@ def __str__(self):

class ControlledVocab(DataObject):
""" ELAN Controlled Vocabulary """

def __init__(self, xml_node=None, **kwargs):
super().__init__(**kwargs)
self.__entries = []
Expand Down Expand Up @@ -655,6 +659,7 @@ class ExternalRef(DataObject):
<EXTERNAL_REF EXT_REF_ID="er1" TYPE="ecv" VALUE="file:/home/tuananh/Documents/ELAN/fables_cv.ecv"/>
"""

def __init__(self, xml_node=None, **kwargs):
super().__init__(**kwargs)
self.__xml_node = xml_node
Expand Down Expand Up @@ -689,11 +694,10 @@ def __str__(self):

@classmethod
def from_xml(cls, xml_node, **kwargs):
return ExternalRef(xml_node=xml_node, **kwargs)
return ExternalRef(xml_node=xml_node, **kwargs)


class Doc(DataObject):

""" This class represents an ELAN file (\*.eaf)
"""

Expand Down Expand Up @@ -1031,7 +1035,8 @@ def parse_eaf_stream(cls, eaf_stream, *args, **kwargs):
elif elem.tag == 'LANGUAGE':
_doc._add_language_xml(elem)
else:
logging.getLogger(__name__).warning(f"Unknown element type -- {elem.tag}. Please consider to report an issue at {__issue__}")
logging.getLogger(__name__).warning(
f"Unknown element type -- {elem.tag}. Please consider to report an issue at {__issue__}")
# linking parts together
# linguistic_types -> vocabs
for lingtype in _doc.linguistic_types:
Expand Down
File renamed without changes.

0 comments on commit 5f89ce6

Please sign in to comment.