Merge pull request #15 from letuananh/dev

rename ttlig to tig
neocl · May 2, 2021 · 628dbe0 · 628dbe0
2 parents 8a73733 + 2dc41dd
commit 628dbe0
Show file tree

Hide file tree

Showing 4 changed files with 67 additions and 67 deletions.
diff --git a/speach/__init__.py b/speach/__init__.py
@@ -1,9 +1,9 @@
 # -*- coding: utf-8 -*-
 
-'''
+"""
 speach - a Python library for managing, annotating, and converting natural language corpuses
 using popular formats (CoNLL, ELAN, Praat, CSV, JSON, SQLite, VTT, Audacity, TTL, TIG, ISF)
-'''
+"""
 
 # This code is a part of speach library: https://github.com/neocl/speach/
 # :copyright: (c) 2018 Le Tuan Anh <tuananh.ke@gmail.com>

diff --git a/speach/__main__.py b/speach/__main__.py
@@ -16,7 +16,7 @@
 from chirptext import chio
 from chirptext.cli import CLIApp, setup_logging
 
-from speach import ttl, TTLSQLite, ttlig, orgmode
+from speach import ttl, TTLSQLite, tig, orgmode
 from speach.elan import parse_eaf_stream
 
 # ----------------------------------------------------------------------
@@ -47,7 +47,7 @@ def get_doc_length(name, ctx):
 
 
 def make_db(cli, args):
-    ''' Convert TTL-TXT to TTL-SQLite '''
+    """ Convert TTL-TXT to TTL-SQLite """
     print("Reading document ...")
     ttl_doc = ttl.Document.read_ttl(args.ttl)
     print("Sentences: {}".format(len(ttl_doc)))
@@ -78,7 +78,7 @@ def process_tig(cli, args):
         sc = 0
         ttl_writer = ttl.TxtWriter.from_path(args.output) if args.output else None
         with chio.open(args.ttlig) as infile:
-            for sent in ttlig.read_stream_iter(infile):
+            for sent in tig.read_stream_iter(infile):
                 sc += 1
                 if ttl_writer is not None:
                     ttl_sent = sent.to_ttl()
@@ -94,7 +94,7 @@ def process_tig(cli, args):
         output.print()
         output.print()
         with chio.open(args.ttlig) as infile:
-            for idx, sent in enumerate(ttlig.read_stream_iter(infile)):
+            for idx, sent in enumerate(tig.read_stream_iter(infile)):
                 sc += 1
                 output.print(sent.to_expex(default_ident=idx + 1))
                 output.print()
@@ -105,7 +105,7 @@ def process_tig(cli, args):
 
 
 def jp_line_proc(line, iglines):
-    igrow = ttlig.text_to_igrow(line.replace('\u3000', ' ').strip())
+    igrow = tig.text_to_igrow(line.replace('\u3000', ' ').strip())
     iglines.append(igrow.text)
     iglines.append(igrow.tokens)
     iglines.append("")
@@ -153,7 +153,7 @@ def make_text(sent, delimiter=' '):
         for tk in sent:
             furi = tk.find('furi', default=None)
             if furi:
-                frags.append(ttlig.make_ruby_html(furi.label))
+                frags.append(tig.make_ruby_html(furi.label))
             else:
                 frags.append(tk.text)
     html_text = delimiter.join(frags) if frags else sent.text

diff --git a/speach/ttlig.py → speach/tig.py b/speach/ttlig.py → speach/tig.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-'''
+"""
 TTL Interlinear Gloss (TIG) format support
 
 More information: https://en.wikipedia.org/wiki/Interlinear_gloss
@@ -11,7 +11,7 @@
     a morphophonemic transliteration,
     a word-by-word or morpheme-by-morpheme gloss, where morphemes within a word are separated by hyphens or other punctuation,
     a free translation, which may be placed in a separate paragraph or on the facing page if the structures of the languages are too different for it to follow the text line by line.
-'''
+"""
 # This code is a part of speach library: https://github.com/neocl/speach/
 # :copyright: (c) 2018 Le Tuan Anh <tuananh.ke@gmail.com>
 # :license: MIT, see LICENSE for more details.
@@ -29,15 +29,6 @@
 from chirptext import ttl
 
 
-
-# ----------------------------------------------------------------------
-# Configuration
-# ----------------------------------------------------------------------
-
-def getLogger():
-    return logging.getLogger(__name__)
-
-
 # ----------------------------------------------------------------------
 # Models
 # ----------------------------------------------------------------------
@@ -48,8 +39,10 @@ def getLogger():
 #     a conventional transliteration into the Latin alphabet,
 #     a phonetic transcription,
 #     a morphophonemic transliteration,
-#     a word-by-word or morpheme-by-morpheme gloss, where morphemes within a word are separated by hyphens or other punctuation,
-#     a free translation, which may be placed in a separate paragraph or on the facing page if the structures of the languages are too different for it to follow the text line by line.
+#     a word-by-word or morpheme-by-morpheme gloss, where morphemes within
+#     a word are separated by hyphens or other punctuation,
+#     a free translation, which may be placed in a separate paragraph or on the facing page
+#     if the structures of the languages are too different for it to follow the text line by line.
 class IGRow(DataObject):
     def __init__(self, text='', transliteration='', transcription='', morphtrans='', morphgloss='', wordgloss='', translation='', **kwargs):
         """
@@ -79,35 +72,35 @@ def to_ttl(self):
             if self.morphtrans:
                 _morphtokens = tokenize(self.morphtrans)
                 if len(_morphtokens) != len(ttl_sent):
-                    getLogger().warning("Morphophonemic transliteration line and tokens line are mismatched for sentence: {}".format(self.ident or self.ID or self.Id or self.id or self.text))
+                    logging.getLogger(__name__).warning("Morphophonemic transliteration line and tokens line are mismatched for sentence: {}".format(self.ident or self.ID or self.Id or self.id or self.text))
                 else:
                     for t, m in zip(ttl_sent, _morphtokens):
                         t.new_tag(m, tagtype='mtrans')
             if self.pos:
                 _postokens = tokenize(self.pos)
                 if len(_postokens) != len(ttl_sent):
-                    getLogger().warning("Part-of-speech line and tokens line are mismatched for sentence: {}".format(self.ident or self.ID or self.Id or self.id or self.text))
+                    logging.getLogger(__name__).warning("Part-of-speech line and tokens line are mismatched for sentence: {}".format(self.ident or self.ID or self.Id or self.id or self.text))
                 else:
                     for t, m in zip(ttl_sent, _postokens):
                         t.pos = m
             if self.lemma:
                 _lemmas = tokenize(self.lemma)
                 if len(_lemmas) != len(ttl_sent):
-                    getLogger().warning("Lemma line and tokens line are mismatched for sentence: {}".format(self.ident or self.ID or self.Id or self.id or self.text))
+                    logging.getLogger(__name__).warning("Lemma line and tokens line are mismatched for sentence: {}".format(self.ident or self.ID or self.Id or self.id or self.text))
                 else:
                     for t, m in zip(ttl_sent, _lemmas):
                         t.lemma = m
             if self.morphgloss:
                 _glosstokens = tokenize(self.morphgloss)
                 if len(_glosstokens) != len(ttl_sent):
-                    getLogger().warning("morpheme-by-morpheme gloss and tokens lines are mismatched for sentence {}".format(self.ident or self.ID or self.Id or self.id or self.text))
+                    logging.getLogger(__name__).warning("morpheme-by-morpheme gloss and tokens lines are mismatched for sentence {}".format(self.ident or self.ID or self.Id or self.id or self.text))
                 else:
                     for t, m in zip(ttl_sent, _glosstokens):
                         t.new_tag(m, tagtype='mgloss')
             if self.wordgloss:
                 _glosstokens = tokenize(self.wordgloss)
                 if len(_glosstokens) != len(ttl_sent):
-                    getLogger().warning("word-by-word gloss and tokens lines are mismatched for sentence {}".format(self.ident or self.ID or self.Id or self.id or self.text))
+                    logging.getLogger(__name__).warning("word-by-word gloss and tokens lines are mismatched for sentence {}".format(self.ident or self.ID or self.Id or self.id or self.text))
                 else:
                     for t, m in zip(ttl_sent, _glosstokens):
                         t.new_tag(m, tagtype='wgloss')
@@ -132,12 +125,12 @@ def to_expex(self, default_ident=''):
             if tags:
                 lengths.append(make_expex_gloss(self.concept, glosses, tags.pop(0)))
             else:
-                getLogger().warning("There are too many gloss lines in sentence {}. {}".format(sent_ident, self.text))
+                logging.getLogger(__name__).warning("There are too many gloss lines in sentence {}. {}".format(sent_ident, self.text))
         # ensure that number of tokens are the same
         if len(lengths) > 1:
             for line_len in lengths[1:]:
                 if line_len != lengths[0]:
-                    getLogger().warning("Inconsistent tokens and morphgloss for sentence {}. {} ({} v.s {})".format(sent_ident, self.text, line_len, lengths[0]))
+                    logging.getLogger(__name__).warning("Inconsistent tokens and morphgloss for sentence {}. {} ({} v.s {})".format(sent_ident, self.text, line_len, lengths[0]))
                     break
         lines.extend(glosses)
         lines.append('\\glft \lit{{{}}}//'.format(escape_latex(self.text)))
@@ -259,7 +252,7 @@ def _parse_row(self, line_list, line_tags):
                 _tag = line[:tag_idx].strip()
                 _val = line[tag_idx + 1:].lstrip().rstrip('\r\n')
                 if _tag.lower() not in TTLIG.KNOWN_LABELS:
-                    getLogger().warning("Unknown tag was used ({}): {}".format(_tag, _val))
+                    logging.getLogger(__name__).warning("Unknown tag was used ({}): {}".format(_tag, _val))
                 line_dict[_tag] = _val
             return IGRow(**line_dict)
         else:
@@ -272,7 +265,7 @@ def read_iter(self, stream):
         line_tags = self.row_format()
         for tag in line_tags:
             if tag.lower() not in TTLIG.KNOWN_LABELS + TTLIG.SPECIAL_LABELS:
-                getLogger().warning("Unknown label in header: {}".format(tag))
+                logging.getLogger(__name__).warning("Unknown label in header: {}".format(tag))
         for row in IGStreamReader._iter_stream(stream):
             yield self._parse_row(row, line_tags)
 
@@ -297,7 +290,7 @@ def _read_header(ig_stream):
                 key = m.group('key').strip()
                 value = m.group('value')
                 if key in meta:
-                    getLogger().warning("Key {} is duplicated in the header".format(key))
+                    logging.getLogger(__name__).warning("Key {} is duplicated in the header".format(key))
                 meta[key] = value
             else:
                 # this line is weird
@@ -497,7 +490,7 @@ def parse(self, text):
                 if not chars.peep():
                     raise ValueError("Escape char ({}) cannot be the last character".format(self.escapechar))
                 elif chars.peep() and chars.peep().value not in (self.escapechar, self.delimiter):
-                    getLogger().warning("Escape char ({}) should not be used for normal character ({}). This can be a potential bug in the data.".format(self.escapechar, chars.peep().value))
+                    logging.getLogger(__name__).warning("Escape char ({}) should not be used for normal character ({}). This can be a potential bug in the data.".format(self.escapechar, chars.peep().value))
                 is_escaping = True
             elif c == self.delimiter:
                 # flush