Skip to content

Commit

Permalink
Integrate md2po-po2md tests (#195)
Browse files Browse the repository at this point in the history
* Integrate md2po-po2md tests

* Fix error escaping characters inside image titles

* Drop unneeded `__slots__` from classes
  • Loading branch information
mondeja committed Nov 15, 2021
1 parent e51a04f commit c882b08
Show file tree
Hide file tree
Showing 38 changed files with 190 additions and 213 deletions.
51 changes: 4 additions & 47 deletions mdpo/md.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Markdown related utilities for mdpo."""

import md4c
import polib

from mdpo.po import po_escaped_string
from mdpo.text import min_not_max_chars_in_a_row
Expand All @@ -11,46 +12,6 @@
)


def escape_links_titles(text, link_start_string='[', link_end_string=']'):
r"""Escapes ``"`` characters found inside link titles.
This is used by mdpo extracting titles of links which contains Markdown
`link titles <https://spec.commonmark.org/0.29/#link-title>`_ delimiter
characters.
Args:
text (str): Text where the links titles to escape will be searched.
link_start_string (str): String that delimites the start of a link.
link_end_string (str): String that delimites the end of a link.
Returns:
str: Same text as input with escaped title delimiters characters found
inside titles.
Examples:
>>> title = '[a link](href "title with characters to escape "")'
>>> escape_links_titles(title)
'[a link](href "title with characters to escape \\"")'
"""
import re

link_end_string_escaped_regex = re.escape(link_end_string)
regex = re.compile(
r'({}[^{}]+{}\([^\s]+\s)([^\)]+)'.format(
re.escape(link_start_string),
link_end_string_escaped_regex,
link_end_string_escaped_regex,
),
)

for match in re.finditer(regex, text):
original_string = match.group(0)
escaped_title = match.group(2)[1:-1].replace('"', '\\"')
target_string = f'{match.group(1)}"{escaped_title}"'
text = text.replace(original_string, target_string)
return text


def parse_link_references(content):
"""Parses link references found in a Markdown content.
Expand Down Expand Up @@ -94,8 +55,6 @@ class MarkdownSpanWrapper:
'code_start_string_escaped',
'code_end_string',
'code_end_string_escaped',
'link_start_string',
'link_end_string',
'wikilink_start_string',
'wikilink_end_string',

Expand Down Expand Up @@ -132,8 +91,6 @@ def __init__(
self.italic_end_string = kwargs.get('italic_end_string', '*')
self.code_start_string = kwargs.get('code_start_string', '`')[0]
self.code_end_string = kwargs.get('code_end_string', '`')[0]
self.link_start_string = kwargs.get('link_start_string', '[')
self.link_end_string = kwargs.get('link_end_string', ']')
self.wikilink_start_string = kwargs.get('wikilink_start_string', '[[')
self.wikilink_end_string = kwargs.get('wikilink_end_string', ']]')

Expand Down Expand Up @@ -176,7 +133,7 @@ def enter_span(self, span, details):
self._inside_codespan = True
self._current_line += self.code_start_string
elif span is md4c.SpanType.A:
self._current_line += self.link_start_string
self._current_line += '['
self._current_aspan_href = details['href'][0][1]
self._current_aspan_title = (
details['title'][0][1] if details['title'] else None
Expand All @@ -200,7 +157,7 @@ def leave_span(self, span, details):
self._current_line += f']({self._current_aspan_href}'
if self._current_aspan_title:
self._current_line += (
f' "{escape_links_titles(self._current_aspan_title)}"'
f' "{polib.escape(self._current_aspan_title)}"'
)
self._current_line += ')'
self._current_aspan_href = False
Expand All @@ -218,7 +175,7 @@ def leave_span(self, span, details):
self._current_line += f']({src}'
if details['title']:
title = details['title'][0][1]
self._current_line += f' "{escape_links_titles(title)}"'
self._current_line += f' "{polib.escape(title)}"'
self._current_line += ')'

def text(self, block, text):
Expand Down
15 changes: 3 additions & 12 deletions mdpo/md2po/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,7 @@ class Md2Po:
'_leavespan_replacer',

'bold_start_string',
'bold_start_string_escaped',
'bold_end_string',
'bold_end_string_escaped',
'italic_start_string',
'italic_start_string_escaped',
'italic_end_string',
Expand Down Expand Up @@ -185,14 +183,7 @@ def __init__(self, files_or_content, **kwargs):

if not self.plaintext:
self.bold_start_string = kwargs.get('bold_start_string', '**')
self.bold_start_string_escaped = po_escaped_string(
self.bold_start_string,
)

self.bold_end_string = kwargs.get('bold_end_string', '**')
self.bold_end_string_escaped = po_escaped_string(
self.bold_end_string,
)

self.italic_start_string = kwargs.get('italic_start_string', '*')
self.italic_start_string_escaped = po_escaped_string(
Expand Down Expand Up @@ -769,10 +760,10 @@ def not_plaintext_leave_span(self, span, details):
# autolink vs link clash (see implementation notes)
self._current_msgid += f'<{self._current_aspan_text}'
if title:
self._current_msgid += f' "{title}"'
self._current_msgid += f' "{polib.escape(title)}"'
self._current_msgid += '>'
else:
title_part = f' "{title}"' if title else ''
title_part = f' "{polib.escape(title)}"' if title else ''
href = details['href'][0][1]
self._current_msgid += (
f'[{self._current_aspan_text}]({href}{title_part})'
Expand Down Expand Up @@ -815,7 +806,7 @@ def not_plaintext_leave_span(self, span, details):
else:
img_markup += f'({imgspan_src}'
if imgspan_title:
img_markup += f' "{imgspan_title}"'
img_markup += f' "{polib.escape(imgspan_title)}"'
img_markup += ')'

self._current_imgspan = {}
Expand Down
8 changes: 6 additions & 2 deletions mdpo/md2po/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,13 +160,17 @@ def parse_options(args=[]):
sys.exit(1)
opts, unknown = parser.parse_known_args(args)

files_or_content = ''
if not sys.stdin.isatty():
files_or_content = sys.stdin.read().strip('\n')
files_or_content += sys.stdin.read().strip('\n')
if isinstance(opts.files_or_content, list) and opts.files_or_content:
if len(opts.files_or_content) == 1:
files_or_content = opts.files_or_content[0]
files_or_content += opts.files_or_content[0]
else:
files_or_content = opts.files_or_content
if not files_or_content:
sys.stderr.write('Files or content to extract not specified\n')
sys.exit(1)
opts.files_or_content = files_or_content

if opts.extensions is None:
Expand Down
8 changes: 5 additions & 3 deletions mdpo/md2po2md/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def build_parser():
'-l', '--lang', dest='langs', default=[], action='append',
help='Language codes used to create the output directories.'
' This argument can be passed multiple times.',
metavar='LANG',
metavar='LANG', required=True,
)

output_paths_schema_help = '' if SPHINX_IS_RUNNING else (
Expand All @@ -56,8 +56,7 @@ def build_parser():
' example.'
)
parser.add_argument(
'-o', '--output', dest='output_paths_schema',
required=True, type=str,
'-o', '--output', dest='output_paths_schema', required=True, type=str,
help='Path schema for outputs, built using placeholders. There is a'
' mandatory placeholder for languages: {lang};and one optional'
f' for output basename: {{basename}}.{output_paths_schema_help}'
Expand Down Expand Up @@ -96,6 +95,9 @@ def parse_options(args=[]):
input_paths_glob += sys.stdin.read().strip('\n')
if isinstance(opts.input_paths_glob, list) and opts.input_paths_glob:
input_paths_glob += opts.input_paths_glob[0]
if not input_paths_glob:
sys.stderr.write('Files or content to translate not specified\n')
sys.exit(1)
opts.input_paths_glob = input_paths_glob

if opts.extensions is None:
Expand Down
3 changes: 3 additions & 0 deletions mdpo/mdpo2html/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ def parse_options(args):
and opts.filepath_or_content
):
filepath_or_content += opts.filepath_or_content[0]
if not filepath_or_content:
sys.stderr.write('Files or content to translate not specified\n')
sys.exit(1)
opts.filepath_or_content = filepath_or_content

opts.command_aliases = parse_command_aliases_cli_arguments(
Expand Down
26 changes: 7 additions & 19 deletions mdpo/po2md/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,7 @@
)
from mdpo.event import debug_events, raise_skip_event
from mdpo.io import save_file_checking_file_changed, to_file_content_if_is_file
from mdpo.md import (
MarkdownSpanWrapper,
escape_links_titles,
parse_link_references,
)
from mdpo.md import MarkdownSpanWrapper, parse_link_references
from mdpo.md4c import DEFAULT_MD4C_GENERIC_PARSER_EXTENSIONS
from mdpo.po import (
paths_or_globs_to_unique_pofiles,
Expand Down Expand Up @@ -42,9 +38,7 @@ class Po2Md:
'wrapwidth',

'bold_start_string',
'bold_start_string_escaped',
'bold_end_string',
'bold_end_string_escaped',
'italic_start_string',
'italic_start_string_escaped',
'italic_end_string',
Expand Down Expand Up @@ -153,14 +147,7 @@ def __init__(self, pofiles, ignore=[], po_encoding=None, **kwargs):
)

self.bold_start_string = kwargs.get('bold_start_string', '**')
self.bold_start_string_escaped = po_escaped_string(
self.bold_start_string,
)

self.bold_end_string = kwargs.get('bold_end_string', '**')
self.bold_end_string_escaped = po_escaped_string(
self.bold_end_string,
)

self.italic_start_string = kwargs.get('italic_start_string', '*')
self.italic_start_string_escaped = po_escaped_string(
Expand Down Expand Up @@ -299,7 +286,7 @@ def _process_command(self, text):
def _escape_translation(self, text):
if self._aimg_title_inside_current_msgid:
# escape '"' characters inside links and image titles
text = escape_links_titles(text)
text = polib.escape(text)
return text

def _translate_msgid(self, msgid, msgctxt, tcomment):
Expand Down Expand Up @@ -352,6 +339,7 @@ def _save_current_msgid(self):
tcomment=self._current_tcomment,
),
)

if self._inside_indented_codeblock:
new_translation = ''
for line in translation.splitlines():
Expand Down Expand Up @@ -388,7 +376,7 @@ def _save_current_msgid(self):
code_end_string_escaped=self.code_end_string_escaped,
wikilink_start_string=self.wikilink_start_string,
wikilink_end_string=self.wikilink_end_string,
).wrap(self._escape_translation(translation))
).wrap(translation)

if self._inside_hblock or self._inside_table:
translation = translation.rstrip('\n')
Expand Down Expand Up @@ -784,7 +772,7 @@ def leave_span(self, span, details):
referenced_target, imgspan_title = (None, None)
imgspan_src = details['src'][0][1]
if details['title']:
imgspan_title = details['title'][0][1]
imgspan_title = polib.escape(details['title'][0][1])
for target, href, title in self._link_references:
if href == imgspan_src and title == imgspan_title:
referenced_target = target
Expand All @@ -805,13 +793,13 @@ def leave_span(self, span, details):
img_markup += f' "{imgspan_title}"'
img_markup += ')'

self._current_imgspan = {}

if self._inside_aspan:
self._current_aspan_text += img_markup
else:
self._current_msgid += img_markup

self._current_imgspan = {}

def text(self, block, text):
# raise 'text' event
if raise_skip_event(
Expand Down
5 changes: 4 additions & 1 deletion mdpo/po2md/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def build_parser():
)
parser.add_argument(
'-p', '--po-files', '--pofiles', metavar='POFILES', action='append',
nargs='*', dest='pofiles',
nargs='*', dest='pofiles', required=True,
help='Glob matching a set of PO files from where to extract references'
' to make the replacements translating strings. This argument'
' can be passed multiple times.',
Expand Down Expand Up @@ -86,6 +86,9 @@ def parse_options(args):
and opts.filepath_or_content
):
filepath_or_content += opts.filepath_or_content[0]
if not filepath_or_content:
sys.stderr.write('Files or content to translate not specified\n')
sys.exit(1)
opts.filepath_or_content = filepath_or_content

opts.command_aliases = parse_command_aliases_cli_arguments(
Expand Down
15 changes: 15 additions & 0 deletions test/test_md.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import os

from mdpo.md import MarkdownSpanWrapper


def test_MarkdownSpanWrapper___slots__(class_slots):
slots = class_slots(MarkdownSpanWrapper)
assert slots

md_util_filepath = os.path.join('mdpo', 'md.py')
with open(md_util_filepath) as f:
content = f.read()

for slot in slots:
assert content.count(f'self.{slot}') > 1
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ msgid ""
"\\`\\` and `code with double backtick` markup."
msgstr ""

msgid "[this is a link](href \"title with \" characters\")"
msgid "[this is a link](href \"title with \\\" characters\")"
msgstr ""

msgid "`\"development\"` and for production"
Expand Down
2 changes: 2 additions & 0 deletions test/test_md2po/extract-examples/markuptext/images.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

My ![foo bar](/path/to/train.jpg "title")

My ![image with escaped title](/some/url.ext "escaped \" title")

[![Image inside inline link](https://image.ext)](https://link.ext)

[![Image inside referenced link](https://image.ext)][2]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ msgstr ""
msgid "My ![foo bar](/path/to/train.jpg \"title\")"
msgstr ""

msgid "My ![image with escaped title](/some/url.ext \"escaped \\\" title\")"
msgstr ""

msgid "[![Image inside inline link](https://image.ext)](https://link.ext)"
msgstr ""

Expand Down
2 changes: 1 addition & 1 deletion test/test_md2po/extract-examples/markuptext/links.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
My favorite PEP is [PEP 0](https://www.python.org/dev/peps/).

My favorite search engine is [Duck Duck Go](https://duckduckgo.com "The best search engine for privacy").
My favorite search engine is [Foo Bar Baz](https://foobarbaz.com "The best search engine for privacy").

Visit <https://www.github.com/mondeja/mdpo>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ msgid "My favorite PEP is [PEP 0](https://www.python.org/dev/peps/)."
msgstr ""

msgid ""
"My favorite search engine is [Duck Duck Go](https://duckduckgo.com \"The "
"best search engine for privacy\")."
"My favorite search engine is [Foo Bar Baz](https://foobarbaz.com \"The best "
"search engine for privacy\")."
msgstr ""

msgid "Visit <https://www.github.com/mondeja/mdpo>"
Expand Down
2 changes: 1 addition & 1 deletion test/test_md2po/extract-examples/plaintext/links.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
My favorite PEP is [PEP 0](https://www.python.org/dev/peps/).

My favorite search engine is [Duck Duck Go](https://duckduckgo.com "The best search engine for privacy").
My favorite search engine is [Foo Bar Baz](https://foobarbaz.com "The best search engine for privacy").

Visit <https://www.github.com/mondeja/md2po>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ msgstr ""
msgid "The best search engine for privacy"
msgstr ""

msgid "My favorite search engine is Duck Duck Go."
msgid "My favorite search engine is Foo Bar Baz."
msgstr ""

msgid "Visit https://www.github.com/mondeja/md2po"
Expand Down
2 changes: 1 addition & 1 deletion test/test_md2po/test_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,4 +122,4 @@ def test___slots__(class_slots):
content = f.read()

for slot in slots:
assert f'self.{slot}' in content
assert content.count(f'self.{slot}') > 1

0 comments on commit c882b08

Please sign in to comment.