Integrate md2po-po2md tests (#195)

* Integrate md2po-po2md tests * Fix error escaping characters inside image titles * Drop unneeded `__slots__` from classes
mondeja · Nov 15, 2021 · c882b08 · c882b08
1 parent e51a04f
commit c882b08
Show file tree

Hide file tree

Showing 38 changed files with 190 additions and 213 deletions.
diff --git a/mdpo/md.py b/mdpo/md.py
@@ -1,6 +1,7 @@
 """Markdown related utilities for mdpo."""
 
 import md4c
+import polib
 
 from mdpo.po import po_escaped_string
 from mdpo.text import min_not_max_chars_in_a_row
@@ -11,46 +12,6 @@
 )
 
 
-def escape_links_titles(text, link_start_string='[', link_end_string=']'):
-    r"""Escapes ``"`` characters found inside link titles.
-
-    This is used by mdpo extracting titles of links which contains Markdown
-    `link titles <https://spec.commonmark.org/0.29/#link-title>`_ delimiter
-    characters.
-
-    Args:
-        text (str): Text where the links titles to escape will be searched.
-        link_start_string (str): String that delimites the start of a link.
-        link_end_string (str): String that delimites the end of a link.
-
-    Returns:
-        str: Same text as input with escaped title delimiters characters found
-        inside titles.
-
-    Examples:
-        >>> title = '[a link](href "title with characters to escape "")'
-        >>> escape_links_titles(title)
-        '[a link](href "title with characters to escape \\"")'
-    """
-    import re
-
-    link_end_string_escaped_regex = re.escape(link_end_string)
-    regex = re.compile(
-        r'({}[^{}]+{}\([^\s]+\s)([^\)]+)'.format(
-            re.escape(link_start_string),
-            link_end_string_escaped_regex,
-            link_end_string_escaped_regex,
-        ),
-    )
-
-    for match in re.finditer(regex, text):
-        original_string = match.group(0)
-        escaped_title = match.group(2)[1:-1].replace('"', '\\"')
-        target_string = f'{match.group(1)}"{escaped_title}"'
-        text = text.replace(original_string, target_string)
-    return text
-
-
 def parse_link_references(content):
     """Parses link references found in a Markdown content.
 
@@ -94,8 +55,6 @@ class MarkdownSpanWrapper:
         'code_start_string_escaped',
         'code_end_string',
         'code_end_string_escaped',
-        'link_start_string',
-        'link_end_string',
         'wikilink_start_string',
         'wikilink_end_string',
 
@@ -132,8 +91,6 @@ def __init__(
         self.italic_end_string = kwargs.get('italic_end_string', '*')
         self.code_start_string = kwargs.get('code_start_string', '`')[0]
         self.code_end_string = kwargs.get('code_end_string', '`')[0]
-        self.link_start_string = kwargs.get('link_start_string', '[')
-        self.link_end_string = kwargs.get('link_end_string', ']')
         self.wikilink_start_string = kwargs.get('wikilink_start_string', '[[')
         self.wikilink_end_string = kwargs.get('wikilink_end_string', ']]')
 
@@ -176,7 +133,7 @@ def enter_span(self, span, details):
             self._inside_codespan = True
             self._current_line += self.code_start_string
         elif span is md4c.SpanType.A:
-            self._current_line += self.link_start_string
+            self._current_line += '['
             self._current_aspan_href = details['href'][0][1]
             self._current_aspan_title = (
                 details['title'][0][1] if details['title'] else None
@@ -200,7 +157,7 @@ def leave_span(self, span, details):
                 self._current_line += f']({self._current_aspan_href}'
                 if self._current_aspan_title:
                     self._current_line += (
-                        f' "{escape_links_titles(self._current_aspan_title)}"'
+                        f' "{polib.escape(self._current_aspan_title)}"'
                     )
                 self._current_line += ')'
             self._current_aspan_href = False
@@ -218,7 +175,7 @@ def leave_span(self, span, details):
             self._current_line += f']({src}'
             if details['title']:
                 title = details['title'][0][1]
-                self._current_line += f' "{escape_links_titles(title)}"'
+                self._current_line += f' "{polib.escape(title)}"'
             self._current_line += ')'
 
     def text(self, block, text):

diff --git a/mdpo/md2po/__init__.py b/mdpo/md2po/__init__.py
@@ -69,9 +69,7 @@ class Md2Po:
         '_leavespan_replacer',
 
         'bold_start_string',
-        'bold_start_string_escaped',
         'bold_end_string',
-        'bold_end_string_escaped',
         'italic_start_string',
         'italic_start_string_escaped',
         'italic_end_string',
@@ -185,14 +183,7 @@ def __init__(self, files_or_content, **kwargs):
 
         if not self.plaintext:
             self.bold_start_string = kwargs.get('bold_start_string', '**')
-            self.bold_start_string_escaped = po_escaped_string(
-                self.bold_start_string,
-            )
-
             self.bold_end_string = kwargs.get('bold_end_string', '**')
-            self.bold_end_string_escaped = po_escaped_string(
-                self.bold_end_string,
-            )
 
             self.italic_start_string = kwargs.get('italic_start_string', '*')
             self.italic_start_string_escaped = po_escaped_string(
@@ -769,10 +760,10 @@ def not_plaintext_leave_span(self, span, details):
                     # autolink vs link clash (see implementation notes)
                     self._current_msgid += f'<{self._current_aspan_text}'
                     if title:
-                        self._current_msgid += f' "{title}"'
+                        self._current_msgid += f' "{polib.escape(title)}"'
                     self._current_msgid += '>'
                 else:
-                    title_part = f' "{title}"' if title else ''
+                    title_part = f' "{polib.escape(title)}"' if title else ''
                     href = details['href'][0][1]
                     self._current_msgid += (
                         f'[{self._current_aspan_text}]({href}{title_part})'
@@ -815,7 +806,7 @@ def not_plaintext_leave_span(self, span, details):
             else:
                 img_markup += f'({imgspan_src}'
                 if imgspan_title:
-                    img_markup += f' "{imgspan_title}"'
+                    img_markup += f' "{polib.escape(imgspan_title)}"'
                 img_markup += ')'
 
             self._current_imgspan = {}

diff --git a/mdpo/md2po/__main__.py b/mdpo/md2po/__main__.py
@@ -160,13 +160,17 @@ def parse_options(args=[]):
         sys.exit(1)
     opts, unknown = parser.parse_known_args(args)
 
+    files_or_content = ''
     if not sys.stdin.isatty():
-        files_or_content = sys.stdin.read().strip('\n')
+        files_or_content += sys.stdin.read().strip('\n')
     if isinstance(opts.files_or_content, list) and opts.files_or_content:
         if len(opts.files_or_content) == 1:
-            files_or_content = opts.files_or_content[0]
+            files_or_content += opts.files_or_content[0]
         else:
             files_or_content = opts.files_or_content
+    if not files_or_content:
+        sys.stderr.write('Files or content to extract not specified\n')
+        sys.exit(1)
     opts.files_or_content = files_or_content
 
     if opts.extensions is None:

diff --git a/mdpo/md2po2md/__main__.py b/mdpo/md2po2md/__main__.py
@@ -43,7 +43,7 @@ def build_parser():
         '-l', '--lang', dest='langs', default=[], action='append',
         help='Language codes used to create the output directories.'
              ' This argument can be passed multiple times.',
-        metavar='LANG',
+        metavar='LANG', required=True,
     )
 
     output_paths_schema_help = '' if SPHINX_IS_RUNNING else (
@@ -56,8 +56,7 @@ def build_parser():
         ' example.'
     )
     parser.add_argument(
-        '-o', '--output', dest='output_paths_schema',
-        required=True, type=str,
+        '-o', '--output', dest='output_paths_schema', required=True, type=str,
         help='Path schema for outputs, built using placeholders. There is a'
              ' mandatory placeholder for languages: {lang};and one optional'
              f' for output basename: {{basename}}.{output_paths_schema_help}'
@@ -96,6 +95,9 @@ def parse_options(args=[]):
         input_paths_glob += sys.stdin.read().strip('\n')
     if isinstance(opts.input_paths_glob, list) and opts.input_paths_glob:
         input_paths_glob += opts.input_paths_glob[0]
+    if not input_paths_glob:
+        sys.stderr.write('Files or content to translate not specified\n')
+        sys.exit(1)
     opts.input_paths_glob = input_paths_glob
 
     if opts.extensions is None:

diff --git a/mdpo/mdpo2html/__main__.py b/mdpo/mdpo2html/__main__.py
@@ -72,6 +72,9 @@ def parse_options(args):
         and opts.filepath_or_content
     ):
         filepath_or_content += opts.filepath_or_content[0]
+    if not filepath_or_content:
+        sys.stderr.write('Files or content to translate not specified\n')
+        sys.exit(1)
     opts.filepath_or_content = filepath_or_content
 
     opts.command_aliases = parse_command_aliases_cli_arguments(

diff --git a/mdpo/po2md/__init__.py b/mdpo/po2md/__init__.py
@@ -9,11 +9,7 @@
 )
 from mdpo.event import debug_events, raise_skip_event
 from mdpo.io import save_file_checking_file_changed, to_file_content_if_is_file
-from mdpo.md import (
-    MarkdownSpanWrapper,
-    escape_links_titles,
-    parse_link_references,
-)
+from mdpo.md import MarkdownSpanWrapper, parse_link_references
 from mdpo.md4c import DEFAULT_MD4C_GENERIC_PARSER_EXTENSIONS
 from mdpo.po import (
     paths_or_globs_to_unique_pofiles,
@@ -42,9 +38,7 @@ class Po2Md:
         'wrapwidth',
 
         'bold_start_string',
-        'bold_start_string_escaped',
         'bold_end_string',
-        'bold_end_string_escaped',
         'italic_start_string',
         'italic_start_string_escaped',
         'italic_end_string',
@@ -153,14 +147,7 @@ def __init__(self, pofiles, ignore=[], po_encoding=None, **kwargs):
         )
 
         self.bold_start_string = kwargs.get('bold_start_string', '**')
-        self.bold_start_string_escaped = po_escaped_string(
-            self.bold_start_string,
-        )
-
         self.bold_end_string = kwargs.get('bold_end_string', '**')
-        self.bold_end_string_escaped = po_escaped_string(
-            self.bold_end_string,
-        )
 
         self.italic_start_string = kwargs.get('italic_start_string', '*')
         self.italic_start_string_escaped = po_escaped_string(
@@ -299,7 +286,7 @@ def _process_command(self, text):
     def _escape_translation(self, text):
         if self._aimg_title_inside_current_msgid:
             # escape '"' characters inside links and image titles
-            text = escape_links_titles(text)
+            text = polib.escape(text)
         return text
 
     def _translate_msgid(self, msgid, msgctxt, tcomment):
@@ -352,6 +339,7 @@ def _save_current_msgid(self):
                     tcomment=self._current_tcomment,
                 ),
             )
+
         if self._inside_indented_codeblock:
             new_translation = ''
             for line in translation.splitlines():
@@ -388,7 +376,7 @@ def _save_current_msgid(self):
                     code_end_string_escaped=self.code_end_string_escaped,
                     wikilink_start_string=self.wikilink_start_string,
                     wikilink_end_string=self.wikilink_end_string,
-                ).wrap(self._escape_translation(translation))
+                ).wrap(translation)
 
                 if self._inside_hblock or self._inside_table:
                     translation = translation.rstrip('\n')
@@ -784,7 +772,7 @@ def leave_span(self, span, details):
             referenced_target, imgspan_title = (None, None)
             imgspan_src = details['src'][0][1]
             if details['title']:
-                imgspan_title = details['title'][0][1]
+                imgspan_title = polib.escape(details['title'][0][1])
                 for target, href, title in self._link_references:
                     if href == imgspan_src and title == imgspan_title:
                         referenced_target = target
@@ -805,13 +793,13 @@ def leave_span(self, span, details):
                     img_markup += f' "{imgspan_title}"'
                 img_markup += ')'
 
-            self._current_imgspan = {}
-
             if self._inside_aspan:
                 self._current_aspan_text += img_markup
             else:
                 self._current_msgid += img_markup
 
+            self._current_imgspan = {}
+
     def text(self, block, text):
         # raise 'text' event
         if raise_skip_event(

diff --git a/mdpo/po2md/__main__.py b/mdpo/po2md/__main__.py
@@ -47,7 +47,7 @@ def build_parser():
     )
     parser.add_argument(
         '-p', '--po-files', '--pofiles', metavar='POFILES', action='append',
-        nargs='*', dest='pofiles',
+        nargs='*', dest='pofiles', required=True,
         help='Glob matching a set of PO files from where to extract references'
              ' to make the replacements translating strings. This argument'
              ' can be passed multiple times.',
@@ -86,6 +86,9 @@ def parse_options(args):
         and opts.filepath_or_content
     ):
         filepath_or_content += opts.filepath_or_content[0]
+    if not filepath_or_content:
+        sys.stderr.write('Files or content to translate not specified\n')
+        sys.exit(1)
     opts.filepath_or_content = filepath_or_content
 
     opts.command_aliases = parse_command_aliases_cli_arguments(

diff --git a/test/test_md.py b/test/test_md.py
@@ -0,0 +1,15 @@
+import os
+
+from mdpo.md import MarkdownSpanWrapper
+
+
+def test_MarkdownSpanWrapper___slots__(class_slots):
+    slots = class_slots(MarkdownSpanWrapper)
+    assert slots
+
+    md_util_filepath = os.path.join('mdpo', 'md.py')
+    with open(md_util_filepath) as f:
+        content = f.read()
+
+    for slot in slots:
+        assert content.count(f'self.{slot}') > 1
diff --git a/test/test_md2po/extract-examples/markuptext/default-escapes.md.expect.po b/test/test_md2po/extract-examples/markuptext/default-escapes.md.expect.po
@@ -57,7 +57,7 @@ msgid ""
 "\\`\\` and `code with double backtick` markup."
 msgstr ""
 
-msgid "[this is a link](href \"title with \" characters\")"
+msgid "[this is a link](href \"title with \\\" characters\")"
 msgstr ""
 
 msgid "`\"development\"` and for production"

diff --git a/test/test_md2po/extract-examples/markuptext/images.md b/test/test_md2po/extract-examples/markuptext/images.md
@@ -8,6 +8,8 @@
 
 My ![foo bar](/path/to/train.jpg "title")
 
+My ![image with escaped title](/some/url.ext "escaped \" title")
+
 [![Image inside inline link](https://image.ext)](https://link.ext)
 
 [![Image inside referenced link](https://image.ext)][2]

diff --git a/test/test_md2po/extract-examples/markuptext/images.md.expect.po b/test/test_md2po/extract-examples/markuptext/images.md.expect.po
@@ -17,6 +17,9 @@ msgstr ""
 msgid "My ![foo bar](/path/to/train.jpg \"title\")"
 msgstr ""
 
+msgid "My ![image with escaped title](/some/url.ext \"escaped \\\" title\")"
+msgstr ""
+
 msgid "[![Image inside inline link](https://image.ext)](https://link.ext)"
 msgstr ""
 

diff --git a/test/test_md2po/extract-examples/markuptext/links.md b/test/test_md2po/extract-examples/markuptext/links.md
@@ -1,6 +1,6 @@
 My favorite PEP is [PEP 0](https://www.python.org/dev/peps/).
 
-My favorite search engine is [Duck Duck Go](https://duckduckgo.com "The best search engine for privacy").
+My favorite search engine is [Foo Bar Baz](https://foobarbaz.com "The best search engine for privacy").
 
 Visit <https://www.github.com/mondeja/mdpo>
 

diff --git a/test/test_md2po/extract-examples/markuptext/links.md.expect.po b/test/test_md2po/extract-examples/markuptext/links.md.expect.po
@@ -6,8 +6,8 @@ msgid "My favorite PEP is [PEP 0](https://www.python.org/dev/peps/)."
 msgstr ""
 
 msgid ""
-"My favorite search engine is [Duck Duck Go](https://duckduckgo.com \"The "
-"best search engine for privacy\")."
+"My favorite search engine is [Foo Bar Baz](https://foobarbaz.com \"The best "
+"search engine for privacy\")."
 msgstr ""
 
 msgid "Visit <https://www.github.com/mondeja/mdpo>"

diff --git a/test/test_md2po/extract-examples/plaintext/links.md b/test/test_md2po/extract-examples/plaintext/links.md
@@ -1,6 +1,6 @@
 My favorite PEP is [PEP 0](https://www.python.org/dev/peps/).
 
-My favorite search engine is [Duck Duck Go](https://duckduckgo.com "The best search engine for privacy").
+My favorite search engine is [Foo Bar Baz](https://foobarbaz.com "The best search engine for privacy").
 
 Visit <https://www.github.com/mondeja/md2po>
 

diff --git a/test/test_md2po/extract-examples/plaintext/links.md.expect.po b/test/test_md2po/extract-examples/plaintext/links.md.expect.po
@@ -8,7 +8,7 @@ msgstr ""
 msgid "The best search engine for privacy"
 msgstr ""
 
-msgid "My favorite search engine is Duck Duck Go."
+msgid "My favorite search engine is Foo Bar Baz."
 msgstr ""
 
 msgid "Visit https://www.github.com/mondeja/md2po"

diff --git a/test/test_md2po/test_extractor.py b/test/test_md2po/test_extractor.py
@@ -122,4 +122,4 @@ def test___slots__(class_slots):
         content = f.read()
 
     for slot in slots:
-        assert f'self.{slot}' in content
+        assert content.count(f'self.{slot}') > 1