/
github_markdown_v2.py
132 lines (110 loc) · 4.83 KB
/
github_markdown_v2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
from __future__ import absolute_import
import re
from mistune import Markdown
from openformats.formats.github_markdown import TxBlockLexer, string_handler
from openformats.formats.yaml import YamlHandler
from ..handlers import Handler
from ..strings import OpenString
from ..utils.compilers import OrderedCompilerMixin
from ..utils.newlines import find_newline_type, force_newline_type
from ..transcribers import Transcriber
class GithubMarkdownHandlerV2(OrderedCompilerMixin, Handler):
name = "Github_Markdown_v2"
extension = "md"
EXTRACTS_RAW = False
BACKSLASH = u'\\'
DOUBLE_QUOTES = u'"'
NEWLINE = u'\n'
COLON = u':'
ASTERISK = u'*'
AMPERSAND = u'&'
DASH = u'-'
def _should_wrap_in_quotes(self, tr_string):
return any([
self.NEWLINE in tr_string[:-1],
self.COLON in tr_string,
tr_string.lstrip().startswith(self.ASTERISK),
tr_string.lstrip().startswith(self.AMPERSAND),
tr_string.lstrip().startswith(self.DASH),
])
def compile(self, template, stringset, **kwargs):
# assume stringset is ordered within the template
transcriber = Transcriber(template)
template = transcriber.source
for string in stringset:
tr_string = string.string
try:
# if string's key is int this is a markdown string
int(string.key)
except ValueError:
if self._should_wrap_in_quotes(tr_string):
# escape double quotes inside strings
tr_string = string.string.replace(
self.DOUBLE_QUOTES,
(self.BACKSLASH + self.DOUBLE_QUOTES)
)
# surround string with double quotes
tr_string = (self.DOUBLE_QUOTES + tr_string +
self.DOUBLE_QUOTES)
# this is to ensure that if the style is literal or folded
# http://www.yaml.org/spec/1.2/spec.html#id2795688
# a new line always follows the string
if (string.flags and string.flags in '|>' and
tr_string[-1] != self.NEWLINE):
tr_string = tr_string + self.NEWLINE
hash_position = template.index(string.template_replacement)
transcriber.copy_until(hash_position)
transcriber.add(tr_string)
transcriber.skip(len(string.template_replacement))
transcriber.copy_until(len(template))
compiled = transcriber.get_destination()
return compiled
def parse(self, content, **kwargs):
newline_type = find_newline_type(content)
if newline_type == 'DOS':
content = force_newline_type(content, 'UNIX')
# mistune expands tabs to 4 spaces and trims trailing spaces, so we
# need to do the same in order to be able to match the substrings
template = content.expandtabs(4)
pattern = re.compile(r'^ +$', re.M)
content = pattern.sub('', template)
stringset = []
yml_header = re.match(r'^(---\s+)([\s\S]*?[^`]\s*)(\n---\s+)(?!-)',
content)
yaml_header_content = ''
yaml_stringset = []
yaml_template = ''
seperator = ''
if yml_header:
yaml_header_content = ''.join(yml_header.group(1, 2))
seperator = yml_header.group(3)
md_content = content[len(yaml_header_content + seperator):]
yaml_stringset, yaml_template = YamlHandler().parse(
yaml_header_content)
else:
md_content = content
md_template = md_content
block = TxBlockLexer()
markdown = Markdown(block=block)
# Making sure stringset is empty because of recursive inside `markdown`
block.md_stringset = []
# Command that populates block.stringset var
markdown(md_content)
stringset.extend(yaml_stringset)
order = len(stringset)
curr_pos = 0
for string in block.md_stringset:
string = string_handler(string, md_template)
if string:
string_object = OpenString(str(order), string, order=order)
order += 1
stringset.append(string_object)
# Keep track of the index of the last replaced hash
md_template = (
md_template[:curr_pos] + md_template[curr_pos:].replace(
string, string_object.template_replacement, 1)
)
curr_pos = md_template.find(string_object.template_replacement)
curr_pos = curr_pos + len(string_object.template_replacement)
template = yaml_template + seperator + md_template
return force_newline_type(template, newline_type), stringset