Merge branch 'donot-change-case'

* donot-change-case: (34 commits) Modify README according to @jonasbn's proposal Update changes Add test for 'lowercase' param Cleanup Update README Add 'lowercase' parameter Prepare for 2.6.0 Add 'Maintain the images in headings' section on README Change param name Recovering TestDefault Remove 'test_' prefix from test files Divide test files with each attributes Fix regex Cleanup - Fix same name function - Fix don't remove image in codeblock Revert "Allow list in args of log" Allow list in args of log Cleanup - Use uppercase in patterns - Cleanup global consts Unify to function escape_brackets ...
naokazuterada · Sep 26, 2017 · 09ec4d8 · 09ec4d8
2 parents f7a9767 + f57428a
commit 09ec4d8
Show file tree

Hide file tree

Showing 20 changed files with 915 additions and 512 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,11 @@
 Changes in MarkdownTOC
 ===========================
 
+## 2.6.0
+
+- Add `remove_image` parameter. Ref: #43
+- Add `lowercase` parameter. Ref: #40
+
 ## 2.5.0
 
 - Add `Customizable list bullets` feature

diff --git a/MarkdownTOC.py b/MarkdownTOC.py
@@ -11,17 +11,13 @@
 # for dbug
 pp = pprint.PrettyPrinter(indent=4)
 
-pattern_reference_link = re.compile(r'\[.+?\]$') # [Heading][my-id]
-pattern_link = re.compile(r'\[(.+?)\]\(.+?\)')  # [link](http://www.sample.com/)
-pattern_ex_id = re.compile(r'\{#.+?\}$')         # [Heading]{#my-id}
-pattern_tag = re.compile(r'<.*?>')
-pattern_anchor = re.compile(r'<a\s+name="[^"]+"\s*>\s*</a>')
-pattern_toc_tag_start = re.compile(r'<!-- *')
-pattern_toc_tag_end = re.compile(r'-->')
+PATTERN_REFERENCE_LINK = re.compile(r'\[.+?\]$') # [Heading][my-id]
+PATTERN_IMAGE = re.compile(r'!\[([^\]]+)\]\([^\)]+\)') # ![alt](path/to/image.png)
+PATTERN_EX_ID = re.compile(r'\{#.+?\}$')         # [Heading]{#my-id}
+PATTERN_TAG = re.compile(r'<.*?>')
+PATTERN_ANCHOR = re.compile(r'<a\s+name="[^"]+"\s*>\s*</a>')
+PATTERN_TOC_TAG_START = re.compile(r'<!-- *')
 
-pattern_h1_h2_equal_dash = "^.*?(?:(?:\r\n)|\n|\r)(?:-+|=+)$"
-
-TOCTAG_START = "<!-- MarkdownTOC -->"
 TOCTAG_END = "<!-- /MarkdownTOC -->"
 
 class MarkdowntocInsert(sublime_plugin.TextCommand):
@@ -34,7 +30,7 @@ def run(self, edit):
                 attrs = self.get_settings()
 
                 # add TOCTAG
-                toc = TOCTAG_START + "\n"
+                toc = "<!-- MarkdownTOC -->\n"
                 toc += "\n"
                 toc += self.get_toc(attrs, sel.end(), edit)
                 toc += "\n"
@@ -105,16 +101,40 @@ def find_tag_and_insert(self, edit):
         return False
 
     def escape_brackets(self, _text):
-        is_in_code = False
-        text = ''
-        for char in _text:
-            if char in ['(', ')', '[', ']'] and not is_in_code:
-                text += '\\' + char
-            else:
-                text += char
-            if char == '`':
-                is_in_code = not is_in_code
-        return text
+        # Escape brackets which not in image and codeblock
+
+        def do_escape(_text, _pattern, _open, _close):
+            images = []
+            brackets = []
+            codes = []
+            for m in re.compile(r'`[^`]*`').finditer(_text):
+                codes.append([m.start(), m.end()])
+            def not_in_codeblock(target):
+                return not within_ranges(target, codes)
+            def not_in_image(target):
+                return not within_ranges(target, images)
+            # Collect images not in codeblock
+            for m in PATTERN_IMAGE.finditer(_text):
+                images.append([m.start(), m.end()])
+            images = list(filter(not_in_codeblock, images))
+            # Collect brackets not in image tags
+            for m in _pattern.finditer(_text):
+                brackets.append([m.start(), m.end()])
+            brackets = list(filter(not_in_image, brackets))
+            brackets = list(filter(not_in_codeblock, brackets))
+            brackets = list(map((lambda x: x[0]), brackets))
+            # Escape brackets
+            def replace_brackets(m):
+                if m.start() in brackets:
+                    return _open+m.group(1)+_close
+                else:
+                    return m.group(0)
+            return re.sub(_pattern, replace_brackets, _text)
+
+        _text = do_escape(_text, re.compile(r'\[([^\]]*)\]'), '\[', '\]')
+        _text = do_escape(_text, re.compile(r'\(([^\)]*)\)'), '\(', '\)')
+
+        return _text
 
     # TODO: add "end" parameter
     def get_toc(self, attrs, begin, edit):
@@ -173,7 +193,9 @@ def heading_to_id(heading):
             elif attrs['markdown_preview'] == 'markdown':
                 return slugify(heading, '-')
             else:
-                if strtobool(attrs['lowercase_only_ascii']):
+                if not strtobool(attrs['lowercase']):
+                    _id = heading
+                elif strtobool(attrs['lowercase_only_ascii']):
                     # only ascii
                     _id = ''.join(chr(ord(x)+('A'<=x<='Z')*32) for x in heading)
                 else:
@@ -192,8 +214,9 @@ def replace_strings_in_id(_str):
 
         # Search headings in docment
         pattern_hash = "^#+?[^#]"
-        headings = self.view.find_all(
-            "%s|%s" % (pattern_h1_h2_equal_dash, pattern_hash))
+        pattern_h1_h2_equal_dash = "^.*?(?:(?:\r\n)|\n|\r)(?:-+|=+)$"
+        pattern_heading = "%s|%s" % (pattern_h1_h2_equal_dash, pattern_hash)
+        headings = self.view.find_all(pattern_heading)
 
         headings = self.remove_items_in_codeblock(headings)
 
@@ -238,19 +261,42 @@ def replace_strings_in_id(_str):
         toc = ''
         _ids = []
         level_counters = [0]
+        remove_image = strtobool(attrs['remove_image'])
         list_bullets = attrs['list_bullets']
 
-
         for item in items:
             _id = None
             _indent = item[0] - 1
             _text = item[1]
+            if remove_image:
+                # Remove markdown image which not in codeblock
+                images = []
+                codes = []
+                for m in re.compile(r'`[^`]*`').finditer(_text):
+                    codes.append([m.start(), m.end()])
+                def not_in_codeblock(_target):
+                    return not within_ranges(_target, codes)
+                # Collect images not in codeblock
+                for m in PATTERN_IMAGE.finditer(_text):
+                    images.append([m.start(), m.end()])
+                images = list(filter(not_in_codeblock, images))
+                images = list(map((lambda x: x[0]), images))
+                def _replace(m):
+                    if m.start() in images:
+                        return ''
+                    else:
+                        return m.group(0)
+                _text = re.sub(PATTERN_IMAGE, _replace, _text)
+
             _list_bullet = list_bullets[_indent%len(list_bullets)]
-            _text = pattern_tag.sub('', _text) # remove html tags
-            _text = _text.rstrip() # remove end space
+            _text = PATTERN_TAG.sub('', _text) # remove html tags
+            _text = _text.strip() # remove start and end spaces
 
             # Ignore links: e.g. '[link](http://sample.com/)' -> 'link'
-            _text = pattern_link.sub('\\1', _text)
+            link = re.compile(r'([^!])\[([^\]]+)\]\([^\)]+\)') # this is [link](http://www.sample.com/)
+            _text = link.sub('\\1\\2', _text)
+            beginning_link = re.compile(r'^\[([^\]]+)\]\([^\)]+\)') # [link](http://www.sample.com/) link in the beginning of line
+            _text = beginning_link.sub('\\1', _text)
 
             # Add indent
             for i in range(_indent):
@@ -260,10 +306,10 @@ def replace_strings_in_id(_str):
                 toc += _prefix
 
             # Reference-style links: e.g. '# heading [my-anchor]'
-            list_reference_link = list(pattern_reference_link.finditer(_text))
+            list_reference_link = list(PATTERN_REFERENCE_LINK.finditer(_text))
 
             # Markdown-Extra special attribute style: e.g. '# heading {#my-anchor}'
-            match_ex_id = pattern_ex_id.search(_text)
+            match_ex_id = PATTERN_EX_ID.search(_text)
 
             if len(list_reference_link):
                 match = list_reference_link[-1]
@@ -309,7 +355,7 @@ def update_anchors(self, edit, items, autoanchor):
         # Iterate in reverse so that inserts don't affect the position
         for item in reversed(items):
             anchor_region = v.line(item[2] - 1)  # -1 to get to previous line
-            is_update = pattern_anchor.match(v.substr(anchor_region))
+            is_update = PATTERN_ANCHOR.match(v.substr(anchor_region))
             if autoanchor:
                 if is_update:
                     new_anchor = '<a name="{0}"></a>'.format(item[3])
@@ -333,8 +379,10 @@ def get_settings(self):
             "autolink":             self.get_setting('default_autolink'),
             "bracket":              self.get_setting('default_bracket'),
             "depth":                self.get_setting('default_depth'),
+            "remove_image":         self.get_setting('default_remove_image'),
             "indent":               self.get_setting('default_indent'),
             "list_bullets":         self.get_setting('default_list_bullets'),
+            "lowercase":            self.get_setting('default_lowercase'),
             "lowercase_only_ascii": self.get_setting('default_lowercase_only_ascii'),
             "style":                self.get_setting('default_style'),
             "uri_encoding":         self.get_setting('default_uri_encoding'),
@@ -344,8 +392,8 @@ def get_settings(self):
     def get_attibutes_from(self, tag_str):
         """return dict of settings from tag_str"""
 
-        tag_str_html = pattern_toc_tag_start.sub("<", tag_str)
-        tag_str_html = pattern_toc_tag_start.sub(">", tag_str_html)
+        tag_str_html = PATTERN_TOC_TAG_START.sub("<", tag_str)
+        tag_str_html = PATTERN_TOC_TAG_START.sub(">", tag_str_html)
 
         soup = BeautifulSoup(tag_str_html, "html.parser")
 
@@ -413,7 +461,15 @@ def strtobool(val):
     else:
         return bool(val)
 
-
+def within_ranges(target, ranges):
+    tb = target[0]
+    te = target[1]
+    for _range in ranges:
+        rb = _range[0]
+        re = _range[1]
+        if (rb <= tb and tb <= re) and (rb <= tb and tb <= re):
+            return True
+    return False
 # Search and refresh if it's exist
 
 

diff --git a/MarkdownTOC.sublime-settings b/MarkdownTOC.sublime-settings
@@ -4,7 +4,9 @@
   "default_bracket": "square",
   "default_depth": 2,
   "default_indent": "\t",
+  "default_remove_image": true,
   "default_list_bullets": "-",
+  "default_lowercase": true,
   "default_lowercase_only_ascii": true,
   "default_style": "unordered",
   "default_uri_encoding": true,

diff --git a/README.md b/README.md
@@ -20,13 +20,15 @@ Sublime Text 3 plugin for generating a Table of Contents (TOC) in a Markdown doc
     - [Auto anchoring when heading has anchor defined](#auto-anchoring-when-heading-has-anchor-defined)
     - [Auto linking for _clickable_ TOC](#auto-linking-for-clickable-toc)
         - [Lowercase only ASCII characters in auto link ids](#lowercase-only-ascii-characters-in-auto-link-ids)
+        - [Preserve case](#preserve-case)
         - [Manipulation of auto link ids](#manipulation-of-auto-link-ids)
         - [URI encoding](#uri-encoding)
         - [Markdown Preview compatible](#markdown-preview-compatible)
     - [Control of depth listed in TOC](#control-of-depth-listed-in-toc)
     - [Ordered or unordered style for TOC elements](#ordered-or-unordered-style-for-toc-elements)
     - [Customizable list bullets in TOC](#customizable-list-bullets-in-toc)
     - [Specify custom indentation prefix](#specify-custom-indentation-prefix)
+    - [Preserve images in headings](#preserve-images-in-headings)
 - [Usage](#usage)
 - [Tips](#tips)
     - [How to remove anchors added by MarkdownTOC](#how-to-remove-anchors-added-by-markdowntoc)
@@ -335,6 +337,22 @@ You can expand the lowercasing capability by setting the `lowecase_only_ascii` a
 # ПРИМЕР EXAMPLE
 ```
 
+#### Preserve case
+
+You can disable the lowercasing capability by setting the `lowecase` attribute to `false`.
+
+```markdown
+<!-- MarkdownTOC autolink="true" lowercase="false" -->
+
+- [One Two Three][One-Two-Three]
+
+<!-- /MarkdownTOC -->
+
+# One Two Three
+```
+
+You can also specify this in your [configuration](#configuration) with key `default_lowercase`.
+
 #### Manipulation of auto link ids
 
 You can manipulate your link ids in your [configuration](#configuration) using the key `id_replacements`.
@@ -616,6 +634,34 @@ Please note that the default for the [attribute](#attributes) is: `'\t'`.
 
 You can set your default indentation in your [configuration](#configuration) with the key `default_indent`.
 
+### Preserve images in headings
+
+If you want to preserve images in headings, set `remove_image` to `false`.
+
+```
+<!-- MarkdownTOC remove_image="false" -->
+
+- ![check](check.png) Everything is OK
+
+<!-- /MarkdownTOC -->
+
+# ![check](check.png) Everything is OK
+```
+
+Please note that the default for the [attribute](#attributes) is: `false`.
+
+```
+<!-- MarkdownTOC -->
+
+- Everything is OK
+
+<!-- /MarkdownTOC -->
+
+# ![check](check.png) Everything is OK
+```
+
+You can change your default setting in your [configuration](#configuration) with the key `default_remove_image`.
+
 ## Usage
 
 1. Open your [Markdown] file
@@ -697,7 +743,9 @@ The following attributes can be used to control the generation of the TOC.
 | `depth`                | integer (`0` means _no limit_) | `2`           | `default_depth`                |
 | `indent`               | string                         | `\t`          | `default_indent`               |
 | `list_bullets`         | string                         | `-`           | `default_list_bullets`         |
+| `lowercase`            | `true`or`false`                | `true`        | `default_lowercase`            |
 | `lowercase_only_ascii` | `true`or`false`                | `true`        | `default_lowercase_only_ascii` |
+| `remove_image`         | `true`or`false`                | `true`        | `default_remove_image`         |
 | `style`                | `ordered` or `unordered`       | `unordered`   | `default_style`                |
 | `uri_encoding`         | `true`or`false`                | `true`        | `default_uri_encoding`         |
 | `markdown_preview`     | `false`or`github`or`markdown`  | `false`       | `default_markdown_preview`     |
@@ -743,7 +791,9 @@ Example: `MarkdownTOC.sublime-settings`
   "default_depth": 2,
   "default_indent": "\t",
   "default_list_bullets": "-",
+  "default_lowercase": true,
   "default_lowercase_only_ascii": true,
+  "default_remove_image": true,
   "default_style": "unordered",
   "default_uri_encoding": true,
   "default_markdown_preview": false,
@@ -769,7 +819,10 @@ For an overview of the specific behaviour behind an attribute, please refer to t
 - `default_bracket`, (see: [Auto linking for _clickable_ TOC](#auto-linking-for-clickable-toc))
 - `default_depth`, (see: [Control of depth listed in TOC](#control-of-depth-listed-in-toc))
 - `default_indent`, (see: [Specify custom indentation prefix](#specify-custom-indentation-prefix))
+- `default_list_bullets`, (see: [Customizable list bullets in TOC](#customizable-list-bullets-in-toc))
+- `default_lowercase`, (see: [Preserve case](#preserve-case))
 - `default_lowercase_only_ascii`, (see: [Lowercase only ASCII characters in auto link ids](#lowercase-only-ascii-characters-in-auto-link-ids))
+- `remove_image`, (see: [Preserve images in headings](#maintain-the-images-in-headings))
 - `default_style`, (see: [Ordered or unordered style for TOC elements](#ordered-or-unordered-style-for-toc-elements))
 - `default_uri_encoding`, (see: [URI encoding](#uri-encoding))
 - `default_markdown_preview`, (see: [Markdown Preview compatible](#markdown-preview-compatible))
@@ -783,6 +836,7 @@ A configuration for writing Markdown primaily for use on [Github] _could_ look l
 {
   "default_autolink": true,
   "default_bracket": "round",
+  "default_lowercase": true,
   "default_lowercase_only_ascii": true
 }
 ```

diff --git a/messages/2.6.0.txt b/messages/2.6.0.txt
@@ -0,0 +1,6 @@
+MarkdownTOC - 2.6.0
+
+CHANGES
+
+- Add `remove_image` parameter. Ref: #43
+- Add `lowercase` parameter. Ref: #40
diff --git a/tests/autoanchor.py b/tests/autoanchor.py
@@ -0,0 +1,41 @@
+# coding:utf-8
+from base import TestBase
+import sublime
+import sys
+
+class TestAutoanchor(TestBase):
+    """Test of attributes 'autoanchor'"""
+
+    # for debug
+    # def tearDown(self):
+    #     pass
+
+    autoanchor_text = \
+"""
+
+<!-- MarkdownTOC autolink=true {0} -->
+
+<!-- /MarkdownTOC -->
+
+# Changelog
+# Glossary
+# API Specification
+"""
+    def test_autoanchor_false(self):
+        """Default Auto Anchor is false"""
+        body_txt = self.commonSetupAndUpdateGetBody(self.autoanchor_text.format(''))
+        self.assert_NotIn('<a name="changelog"></a>', body_txt)
+        self.assert_NotIn('<a name="glossary"></a>', body_txt)
+        self.assert_NotIn('<a name="api-specification"></a>', body_txt)
+
+    def test_autoanchor_true(self):
+        body_txt = self.commonSetupAndUpdateGetBody(self.autoanchor_text.format('autoanchor=true'))
+        self.assert_In('<a name="changelog"></a>\n# Changelog', body_txt)
+        self.assert_In('<a name="glossary"></a>\n# Glossary', body_txt)
+        self.assert_In('<a name="api-specification"></a>\n# API Specification', body_txt)
+
+    def test_autoanchor_false(self):
+        body_txt = self.commonSetupAndUpdateGetBody(self.autoanchor_text.format('autoanchor=false'))
+        self.assert_NotIn('<a name="changelog"></a>', body_txt)
+        self.assert_NotIn('<a name="glossary"></a>', body_txt)
+        self.assert_NotIn('<a name="api-specification"></a>', body_txt)