Skip to content

Commit

Permalink
Merge pull request #74 from transifex/yaml_parser
Browse files Browse the repository at this point in the history
Version 2 of GithubMarkdownHandler using an actual yaml parser
  • Loading branch information
SofiaMargariti committed Aug 31, 2017
2 parents b0e4c07 + bfe3d2c commit 6ce81a8
Show file tree
Hide file tree
Showing 11 changed files with 1,165 additions and 3 deletions.
4 changes: 2 additions & 2 deletions bin/create_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))

from openformats.formats import (
plaintext, srt, android, json, po, github_markdown)
plaintext, srt, android, json, po, github_markdown_v2)
from openformats.tests.utils import translate_stringset

args = argparse.ArgumentParser
Expand All @@ -30,7 +30,7 @@ def get_handler(ext):
'xml': android.AndroidHandler(),
'json': json.JsonHandler(),
'po': po.PoHandler(),
'md': github_markdown.GithubMarkdownHandler(),
'md': github_markdown_v2.GithubMarkdownHandlerV2(),
}[ext]


Expand Down
114 changes: 114 additions & 0 deletions openformats/formats/github_markdown_v2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
from __future__ import absolute_import
import re

from mistune import Markdown

from openformats.formats.github_markdown import TxBlockLexer, string_handler
from openformats.formats.yaml import YamlHandler
from ..handlers import Handler
from ..strings import OpenString
from ..utils.compilers import OrderedCompilerMixin
from ..transcribers import Transcriber


class GithubMarkdownHandlerV2(OrderedCompilerMixin, Handler):
name = "Github_Markdown_v2"
extension = "md"
EXTRACTS_RAW = False

BACKSLASH = u'\\'
DOUBLE_QUOTES = u'"'
NEWLINE = u'\n'

def compile(self, template, stringset, **kwargs):
# assume stringset is ordered within the template
transcriber = Transcriber(template)
template = transcriber.source

for string in stringset:
tr_string = string.string
try:
# if string's key is int this is a markdown string
int(string.key)
except ValueError:
if self.NEWLINE in tr_string[:-1]:
# escape double quotes inside strings
tr_string = string.string.replace(
self.DOUBLE_QUOTES,
(self.BACKSLASH + self.DOUBLE_QUOTES)
)
# surround string with double quotes
tr_string = (self.DOUBLE_QUOTES + tr_string +
self.DOUBLE_QUOTES)
# this is to ensure that if the style is literal or folded
# http://www.yaml.org/spec/1.2/spec.html#id2795688
# a new line always follows the string
if (string.flags and string.flags in '|>' and
tr_string[-1] != self.NEWLINE):
tr_string = tr_string + self.NEWLINE

hash_position = template.index(string.template_replacement)
transcriber.copy_until(hash_position)
transcriber.add(tr_string)
transcriber.skip(len(string.template_replacement))

transcriber.copy_until(len(template))
compiled = transcriber.get_destination()

return compiled

def parse(self, content, **kwargs):
# mistune expands tabs to 4 spaces and trims trailing spaces, so we
# need to do the same in order to be able to match the substrings
template = content.expandtabs(4)
pattern = re.compile(r'^ +$', re.M)
template = pattern.sub('', template)

stringset = []

yml_header = re.match(r'^(---\s+)([\s\S]*?[^`]\s*)(\n---\s+)(?!-)',
content)
yaml_header_content = ''
yaml_stringset = []
yaml_template = ''
seperator = ''
if yml_header:
yaml_header_content = ''.join(yml_header.group(1, 2))
seperator = yml_header.group(3)
md_content = content[len(yaml_header_content + seperator):]
yaml_stringset, yaml_template = YamlHandler().parse(
yaml_header_content)
else:
md_content = content

md_template = md_content

block = TxBlockLexer()
markdown = Markdown(block=block)

# Making sure stringset is empty because of recursive inside `markdown`
block.md_stringset = []

# Command that populates block.stringset var
markdown(md_content)

stringset.extend(yaml_stringset)
order = len(stringset)
curr_pos = 0
for string in block.md_stringset:
string = string_handler(string, md_template)
if string:
string_object = OpenString(str(order), string, order=order)
order += 1
stringset.append(string_object)
# Keep track of the index of the last replaced hash
md_template = (
md_template[:curr_pos] + md_template[curr_pos:].replace(
string, string_object.template_replacement, 1)
)

curr_pos = md_template.find(string_object.template_replacement)
curr_pos = curr_pos + len(string_object.template_replacement)

template = yaml_template + seperator + md_template
return template, stringset
Loading

0 comments on commit 6ce81a8

Please sign in to comment.