Merge pull request #24 from letuananh/main

Fix tier participant code editing bug
neocl · May 21, 2021 · 5f89ce6 · 5f89ce6
2 parents 79f34ce + bbee548
commit 5f89ce6
Show file tree

Hide file tree

Showing 12 changed files with 439 additions and 57 deletions.
diff --git a/README.md b/README.md
@@ -4,15 +4,15 @@
 [![Total alerts](https://img.shields.io/lgtm/alerts/g/neocl/speach.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/neocl/speach/alerts/)
 [![Language grade: Python](https://img.shields.io/lgtm/grade/python/g/neocl/speach.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/neocl/speach/context:python)
 
-Speach (formerly [texttaglib](https://github.com/letuananh/texttaglib/)), is a Python 3 library for managing, annotating, and converting natural language corpuses using popular formats (CoNLL, ELAN, Praat, CSV, JSON, SQLite, VTT, Audacity, TTL, TIG, ISF, etc.)
+Speach (formerly [texttaglib](https://github.com/letuananh/texttaglib/)), is a Python 3 library for managing, annotating, and converting natural language corpuses using popular formats (CoNLL, ELAN, Praat, CSV, JSON, SQLite, VTT, Audacity, TTL, TTLIG, ISF, etc.)
 
 Main functions are:
 
-- Text corpus management
-- Manipulating [ELAN](https://archive.mpi.nl/tla/elan/download>) transcription files directly in ELAN Annotation Format (eaf)
-- TIG - A human-friendly intelinear gloss format for linguistic documentation
-- Multiple storage formats (text, CSV, JSON, SQLite databases)
+- Reading, editing, and writing ELAN transcriptions and related media files directly in [ELAN Annotation Format](https://archive.mpi.nl/tla/elan/download) (eaf)
 - Cutting, converting, and merging audio/video files
+- TTLIG (or TIG) - A human-friendly linguistic documentation format with intelinear gloss support
+- Text corpus management using texttaglib format
+- Multiple storage formats (text, CSV, JSON, SQLite databases)
 
 ## Useful Links
 
@@ -60,3 +60,10 @@ Processing media files
 ```
 
 Read [Speach documentation](https://speach.readthedocs.io/) for more information.
+
+## Contributors
+
+- [Le Tuan Anh](https://github.com/letuananh) (Maintainer)
+- [Victoria Chua](https://github.com/vicchuayh)
+
+Contributors are welcome! If you want to help developing speach, please visit [Contributing](https://speach.readthedocs.io/en/latest/contributing.html) page.
diff --git a/cov.sh b/cov.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
 
-python3 -m coverage run --source texttaglib --branch -m unittest discover -s test
+python3 -m coverage run --source speach --branch -m unittest discover -s test
 python3 -m coverage html
 
diff --git a/demo_elan.py b/demo_elan.py
@@ -1,7 +1,7 @@
 from speach import elan
 
 # read an ELAN file
-eaf = elan.read_eaf('./test/data/test.eaf')
+eaf = elan.read_eaf('./test_data/fables_01_03_aesop_64kb.eaf')
 
 # accessing metadata
 print("Accessing EAF Metadata")
@@ -13,7 +13,7 @@
 print(f"Media relative URL: {eaf.relative_media_url}")
 
 # loop through all tiers in this eaf file
-print("\nLoop through all tiers")
+print("\nBasic ELAN demo: looping through all tiers and their annotations")
 print("-" * 60)
 for tier in eaf:
     print(f"{tier.ID} | Participant: {tier.participant} | Type: {tier.type_ref}")
@@ -22,7 +22,7 @@
         print(f"{ann.ID.rjust(4, ' ')}. [{ann.from_ts} :: {ann.to_ts}] {ann.text}")
 
 # loop through the root tiers only (i.e. ignored dependent tiers)
-print("\nLoop through root tiers only")
+print("\n\nDemo nested ELAN file: loop through root tiers only")
 print("-" * 60)
 for tier in eaf.roots:
     print(f"[+]-- {tier.ID} | Participant: {tier.participant} | Type: {tier.type_ref}")

diff --git a/docs/index.rst b/docs/index.rst
@@ -16,7 +16,7 @@ Main functions:
 - TIG - A human-friendly intelinear gloss format for linguistic documentation
 - Multiple storage formats (text files, JSON files, SQLite databases)
 
-:ref:`Contributors <contributors>` are welcome!.
+:ref:`Contributors <contributors>` are welcome!
 If you want to help developing ``speach``, please visit :ref:`contributing` page.
 
 Installation

diff --git a/speach/__init__.py b/speach/__init__.py
@@ -15,8 +15,9 @@
 from .__version__ import __version_major__, __version_long__, __version__, __status__
 
 from chirptext import ttl
+from . import ttlig as tig  # expose ttlig as tig
 from .sqlite import TTLSQLite
 
 
-__all__ = ['ttl', 'TTLSQLite',
+__all__ = ['ttl', 'TTLSQLite', 'tig',
            "__version__", "__author__", "__description__", "__copyright__"]
diff --git a/speach/__main__.py b/speach/__main__.py
@@ -16,7 +16,7 @@
 from chirptext import chio
 from chirptext.cli import CLIApp, setup_logging
 
-from speach import ttl, TTLSQLite, tig, orgmode
+from speach import ttl, TTLSQLite, ttlig, orgmode
 from speach.elan import parse_eaf_stream
 
 # ----------------------------------------------------------------------
@@ -78,7 +78,7 @@ def process_tig(cli, args):
         sc = 0
         ttl_writer = ttl.TxtWriter.from_path(args.output) if args.output else None
         with chio.open(args.ttlig) as infile:
-            for sent in tig.read_stream_iter(infile):
+            for sent in ttlig.read_stream_iter(infile):
                 sc += 1
                 if ttl_writer is not None:
                     ttl_sent = sent.to_ttl()
@@ -94,7 +94,7 @@ def process_tig(cli, args):
         output.print()
         output.print()
         with chio.open(args.ttlig) as infile:
-            for idx, sent in enumerate(tig.read_stream_iter(infile)):
+            for idx, sent in enumerate(ttlig.read_stream_iter(infile)):
                 sc += 1
                 output.print(sent.to_expex(default_ident=idx + 1))
                 output.print()
@@ -105,7 +105,7 @@ def process_tig(cli, args):
 
 
 def jp_line_proc(line, iglines):
-    igrow = tig.text_to_igrow(line.replace('\u3000', ' ').strip())
+    igrow = ttlig.text_to_igrow(line.replace('\u3000', ' ').strip())
     iglines.append(igrow.text)
     iglines.append(igrow.tokens)
     iglines.append("")
@@ -153,7 +153,7 @@ def make_text(sent, delimiter=' '):
         for tk in sent:
             furi = tk.find('furi', default=None)
             if furi:
-                frags.append(tig.make_ruby_html(furi.label))
+                frags.append(ttlig.make_ruby_html(furi.label))
             else:
                 frags.append(tk.text)
     html_text = delimiter.join(frags) if frags else sent.text

diff --git a/speach/__version__.py b/speach/__version__.py
@@ -14,6 +14,6 @@
 __issue__ = "https://github.com/neocl/speach/issues/"
 __maintainer__ = "Le Tuan Anh"
 __version_major__ = "0.1"  # follow PEP-0440
-__version__ = "{}a6".format(__version_major__)
-__version_long__ = "{} - Alpha 6".format(__version_major__)
+__version__ = "{}a7".format(__version_major__)
+__version_long__ = "{} - Alpha 7".format(__version_major__)
 __status__ = "Prototype"
diff --git a/speach/elan.py b/speach/elan.py
@@ -174,6 +174,7 @@ def __str__(self):
 class TimeAnnotation(Annotation):
     """ An ELAN time-alignable annotation
     """
+
     def __init__(self, ID, from_ts, to_ts, value, xml_node=None, **kwargs):
         super().__init__(ID, value, xml_node=xml_node, **kwargs)
         self.__from_ts = from_ts
@@ -344,8 +345,11 @@ def participant(self):
 
     @participant.setter
     def participant(self, value):
-        if self.__xml_node:
+        if self.__xml_node is not None:
             self.__xml_node.set('PARTICIPANT', value)
+        else:
+            logging.getLogger(__name__).warning(
+                f"Could not update participant, DOM node is missing for tier {self.name}")
         self.__participant = value
 
     @property
@@ -469,7 +473,6 @@ def _add_annotation_xml(self, annotation_node) -> Annotation:
 
 
 class CVEntry(DataObject):
-
     """ A controlled vocabulary entry """
 
     def __init__(self, xml_node=None, **kwargs):
@@ -508,6 +511,7 @@ def __str__(self):
 
 class ControlledVocab(DataObject):
     """ ELAN Controlled Vocabulary """
+
     def __init__(self, xml_node=None, **kwargs):
         super().__init__(**kwargs)
         self.__entries = []
@@ -655,6 +659,7 @@ class ExternalRef(DataObject):
 
     <EXTERNAL_REF EXT_REF_ID="er1" TYPE="ecv" VALUE="file:/home/tuananh/Documents/ELAN/fables_cv.ecv"/>
     """
+
     def __init__(self, xml_node=None, **kwargs):
         super().__init__(**kwargs)
         self.__xml_node = xml_node
@@ -689,11 +694,10 @@ def __str__(self):
 
     @classmethod
     def from_xml(cls, xml_node, **kwargs):
-        return ExternalRef(xml_node=xml_node, **kwargs)    
+        return ExternalRef(xml_node=xml_node, **kwargs)
 
 
 class Doc(DataObject):
-
     """ This class represents an ELAN file (\*.eaf)
     """
 
@@ -1031,7 +1035,8 @@ def parse_eaf_stream(cls, eaf_stream, *args, **kwargs):
             elif elem.tag == 'LANGUAGE':
                 _doc._add_language_xml(elem)
             else:
-                logging.getLogger(__name__).warning(f"Unknown element type -- {elem.tag}. Please consider to report an issue at {__issue__}")
+                logging.getLogger(__name__).warning(
+                    f"Unknown element type -- {elem.tag}. Please consider to report an issue at {__issue__}")
         # linking parts together
         # linguistic_types -> vocabs
         for lingtype in _doc.linguistic_types:

diff --git a/speach/tig.py → speach/ttlig.py b/speach/tig.py → speach/ttlig.py