Skip to content

Commit

Permalink
Fixed os linebreaks
Browse files Browse the repository at this point in the history
  • Loading branch information
malteos committed Sep 18, 2018
1 parent 12ad82a commit d6dbb28
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 60 deletions.
29 changes: 27 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,37 @@

[![Build Status](https://travis-ci.org/openlegaldata/legal-md.svg?branch=master)](https://travis-ci.org/openlegaldata/legal-md) [![Coverage Status](https://coveralls.io/repos/github/openlegaldata/legal-md/badge.svg?branch=master)](https://coveralls.io/github/openlegaldata/legal-md?branch=master)

*!!! THIS EXTENSION IS STILL WORK-IN-PROGRESS !!!*

WIP
Extension for [Python-Markdown](https://python-markdown.github.io/). Supported features:

Install locally:
- Line numbers: Convert paragraph numbers into HTML tables
- Special links: Semantic annotation and citation markers


## Install

```
pip install git+https://github.com/openlegaldata/legal-md.git#egg=legal-md
# Install locally (dev purpose)
pip install -e /var/www/apps/oldp/app/
```

## Usage

```python
import markdown

md_str = '# Title\n'
md_str += '1| Paragraph with line number\n'

html = markdown.markdown(md_str, extensions=[
'legal_md.extensions.line_numbers',
])

```

## License

not licensed yet
75 changes: 18 additions & 57 deletions legal_md/extensions/line_numbers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import re

from bs4 import BeautifulSoup
Expand All @@ -13,6 +14,8 @@
PLACEHOLDER_BEGIN = '{{{{{'
PLACEHOLDER_END = '}}}}}'

logger = logging.getLogger(__name__)


class LineNumberExtension(Extension):
"""Line number extension.
Expand Down Expand Up @@ -85,7 +88,10 @@ def run(self, text):
out_lines = []
last_with_line_no = False
last_line_no = 0
lines = text.split('\n')
lines = text.splitlines()

# print('Post lines: %s' % lines)

for i, line in enumerate(lines):
is_last_line = i == len(lines) - 1

Expand Down Expand Up @@ -152,7 +158,11 @@ def run(self, text):

out_lines.append(line)

return '\n'.join(out_lines)
out_str = '\n'.join(out_lines)

# print('Output lines: %s' % out_str)

return out_str


class LineNumberPreprocessor(Preprocessor):
Expand All @@ -168,74 +178,25 @@ def run(self, lines):
ln_marker_close = PLACEHOLDER_BEGIN + '/' + PLACEHOLDER_END
ln_with_marker = [] # store index of lines with number

# print('Input lines: %s' % lines)

for i, line in enumerate(lines):
match = re.search(pattern, line)

if match: # If line marker found
line_content = match.group(2)

# Replace line breaks regardless of the OS
line_content = line_content.replace('\r', '').replace('\n', '')

ln_marker = PLACEHOLDER_BEGIN + match.group(1) + PLACEHOLDER_END

# Write marker at the end of line
lines[i] = line_content + ln_marker + ln_marker_close
ln_with_marker.append(i)

# print('\n'.join(lines) + '\n-------------------')

return lines

def _run(self, lines):
"""Process line number marks."""

# Find and process critic marks
# text = '\n'.join(lines)
out_lines = []
pattern = r'^([0-9]+|#|:)\|\s(.*)$'
found_ln_marker = False
ln_marker_close = None

for i, l in enumerate(lines):
is_last_line = i == len(lines) - 1
# print('PRE: %s' % l)
match = re.search(pattern, l)
if match:
# print('found LN')
line_content = match.group(2)
ln_marker = PLACEHOLDER_BEGIN + match.group(1) + PLACEHOLDER_END
ln_marker_close = PLACEHOLDER_BEGIN + '/' + PLACEHOLDER_END

if is_last_line:
# last line, set closing marker
l = line_content + ln_marker + ln_marker_close
ln_marker_close = None
else:
# not last line
if lines[i + 1].strip() == '':
# next line is empty
l = line_content + ln_marker + ln_marker_close
ln_marker_close = None
else:
# next line is not empty, write marker to end of the line which next lines is empty
l = line_content + ln_marker
pass
pass
else:
if ln_marker_close is not None:
if is_last_line:
# Set close marker if is last line
l = l + ln_marker_close
ln_marker_close = None
else:
# Set close marker if next line is empty
if lines[i + 1].strip() == '':
l = l + ln_marker_close
ln_marker_close = None
else:
pass

out_lines.append(l)

return out_lines


def makeExtension(**kwargs): # pragma: no cover
return LineNumberExtension(**kwargs)
5 changes: 4 additions & 1 deletion legal_md/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,15 @@ class MarkdownExtensionTest(TestCase):
]
RESOURCE_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'resources')

def convert_md(self, md_content):
return markdown.markdown(md_content, extensions=self.extensions)

def assert_md_file(self, resource_name, msg=None):
"""Compare resource markdown file with corresponding html file"""
with open(os.path.join(self.RESOURCE_DIR, resource_name + '.md')) as md_file:
with open(os.path.join(self.RESOURCE_DIR, resource_name + '.html')) as html_file:
md_content = md_file.read()
expected = html_file.read().strip()
actual = markdown.markdown(md_content, extensions=self.extensions)
actual = self.convert_md(md_content)

self.assertEqual(expected, actual, msg)
8 changes: 8 additions & 0 deletions legal_md/tests/resources/os_line_breaks.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<table class="table table-lines">
<tr class="line" data-line="1"><td class="line-number"><a href="#L1">1</a></td><td class="line-content"><p>
ab
</p></td></tr>
<tr class="line" data-line="2"><td class="line-number"><a href="#L2">2</a></td><td class="line-content"><p>
cd
</p></td></tr>
</table>
7 changes: 7 additions & 0 deletions legal_md/tests/test_line_numbers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os

from legal_md.tests import MarkdownExtensionTest


Expand All @@ -16,6 +18,11 @@ def test_mixed(self):
def test_case_text(self):
self.assert_md_file('case_text')

def test_os_line_breaks(self):
"""Test on line breaks (e.g. \r\n, ...) """
with open(os.path.join(self.RESOURCE_DIR, 'os_line_breaks.html')) as html_file:
self.assertEqual(self.convert_md('1| ab\r\n\r\n2| cd'), html_file.read().strip())

def test_lines_with_single_line_breaks(self):
self.assert_md_file('lines_with_single_line_breaks')

Expand Down

0 comments on commit d6dbb28

Please sign in to comment.