Skip to content
This repository has been archived by the owner on Oct 13, 2021. It is now read-only.

Commit

Permalink
Don't treat vertical tabs or form feeds as line ending characters.
Browse files Browse the repository at this point in the history
This matches the behavior of clang and most text editors.

Closes #631.
  • Loading branch information
James Abbatiello committed Jan 26, 2017
2 parents a4f5f39 + 96f62fa commit debc4e4
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 1 deletion.
21 changes: 20 additions & 1 deletion dxr/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,26 @@ def split_content_lines(unicode):
resulting lines.
"""
return unicode.splitlines(True)
lines = unicode.splitlines(True)
# Vertical Tabs, Form Feeds and some other characters are treated as
# end-of-lines by unicode.splitlines.
# See https://docs.python.org/2/library/stdtypes.html#unicode.splitlines
# Since we don't want those characters to be treated as line endings, we
# take the result and stitch any affected lines back together.

# str.splitlines behaves more as we desire but encoding, calling
# str.splitlines and then decoding again is slower.

# Using a frozenset here is faster than using a tuple.
non_line_endings = frozenset((u"\v", u"\f", u"\x1c", u"\x1d", u"\x1e",
u"\x85", u"\u2028", u"\u2029"))
def unsplit_some_lines(accum, x):
if accum and accum[-1] and accum[-1][-1] in non_line_endings:
accum[-1] += x
else:
accum.append(x)
return accum
return reduce(unsplit_some_lines, lines, [])


def unicode_for_display(str):
Expand Down
9 changes: 9 additions & 0 deletions tests/test_lines.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,3 +307,12 @@ def test_empty_tag_boundaries(self):
"""
text_to_html_lines('hello!',
regions=[(3, 3, Region('a')), (3, 5, Region('b'))])

def test_unusual_whitespace():
"""Ensure that vertical tabs and form feeds are treated as ordinary
whitespace and not as line endings"""
lines = [u"This contains 3 lines\n",
u"This line has a vertical tab \v and a form feed \f in it\n",
u"This is the last line\n"]
eq_(split_content_lines(u''.join(lines)), lines)

0 comments on commit debc4e4

Please sign in to comment.