diff --git a/src/blurb/_blurb_file.py b/src/blurb/_blurb_file.py index fbd1cf5..d0c0df4 100644 --- a/src/blurb/_blurb_file.py +++ b/src/blurb/_blurb_file.py @@ -83,7 +83,8 @@ import re from blurb._template import sanitize_section, sections, unsanitize_section -from blurb.blurb import BlurbError, textwrap_body, sortable_datetime, nonceify +from blurb._utils.text import textwrap_body +from blurb.blurb import BlurbError, sortable_datetime, nonceify root = None # Set by chdir_to_repo_root() lowest_possible_gh_issue_number = 32426 diff --git a/src/blurb/_merge.py b/src/blurb/_merge.py index ab26a3e..aa9aaee 100644 --- a/src/blurb/_merge.py +++ b/src/blurb/_merge.py @@ -4,8 +4,9 @@ from blurb._blurb_file import Blurbs from blurb._cli import require_ok, subcommand +from blurb._utils.text import textwrap_body from blurb._versions import glob_versions, printable_version -from blurb.blurb import glob_blurbs, textwrap_body +from blurb.blurb import glob_blurbs original_dir: str = os.getcwd() diff --git a/src/blurb/_utils/__init__.py b/src/blurb/_utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/blurb/_utils/text.py b/src/blurb/_utils/text.py new file mode 100644 index 0000000..b5b7d02 --- /dev/null +++ b/src/blurb/_utils/text.py @@ -0,0 +1,99 @@ +from __future__ import annotations + +import itertools +import textwrap + +TYPE_CHECKING = False +if TYPE_CHECKING: + from collections.abc import Iterable + + +def textwrap_body(body: str | Iterable[str], *, subsequent_indent: str = '') -> str: + """Wrap body text. + + Accepts either a string or an iterable of strings. + (Iterable is assumed to be individual lines.) + Returns a string. + """ + if isinstance(body, str): + text = body + else: + text = '\n'.join(body).rstrip() + + # textwrap merges paragraphs, ARGH + + # step 1: remove trailing whitespace from individual lines + # (this means that empty lines will just have \n, no invisible whitespace) + lines = [] + for line in text.split('\n'): + lines.append(line.rstrip()) + text = '\n'.join(lines) + # step 2: break into paragraphs and wrap those + paragraphs = text.split('\n\n') + paragraphs2 = [] + kwargs: dict[str, object] = {'break_long_words': False, 'break_on_hyphens': False} + if subsequent_indent: + kwargs['subsequent_indent'] = subsequent_indent + dont_reflow = False + for paragraph in paragraphs: + # don't reflow bulleted / numbered lists + dont_reflow = dont_reflow or paragraph.startswith(('* ', '1. ', '#. ')) + if dont_reflow: + initial = kwargs.get('initial_indent', '') + subsequent = kwargs.get('subsequent_indent', '') + if initial or subsequent: + lines = [line.rstrip() for line in paragraph.split('\n')] + indents = itertools.chain( + itertools.repeat(initial, 1), + itertools.repeat(subsequent), + ) + lines = [indent + line for indent, line in zip(indents, lines)] + paragraph = '\n'.join(lines) + paragraphs2.append(paragraph) + else: + # Why do we reflow the text twice? Because it can actually change + # between the first and second reflows, and we want the text to + # be stable. The problem is that textwrap.wrap is deliberately + # dumb about how many spaces follow a period in prose. + # + # We're reflowing at 76 columns, but let's pretend it's 30 for + # illustration purposes. If we give textwrap.wrap the following + # text--ignore the line of 30 dashes, that's just to help you + # with visualization: + # + # ------------------------------ + # xxxx xxxx xxxx xxxx xxxx. xxxx + # + # The first textwrap.wrap will return this: + # 'xxxx xxxx xxxx xxxx xxxx.\nxxxx' + # + # If we reflow it again, textwrap will rejoin the lines, but + # only with one space after the period! So this time it'll + # all fit on one line, behold: + # ------------------------------ + # xxxx xxxx xxxx xxxx xxxx. xxxx + # and so it now returns: + # 'xxxx xxxx xxxx xxxx xxxx. xxxx' + # + # textwrap.wrap supports trying to add two spaces after a peroid: + # https://docs.python.org/3/library/textwrap.html#textwrap.TextWrapper.fix_sentence_endings + # But it doesn't work all that well, because it's not smart enough + # to do a really good job. + # + # Since blurbs are eventually turned into reST and rendered anyway, + # and since the Zen says 'In the face of ambiguity, refuse the + # temptation to guess', I don't sweat it. I run textwrap.wrap + # twice, so it's stable, and this means occasionally it'll + # convert two spaces to one space, no big deal. + + paragraph = '\n'.join(textwrap.wrap(paragraph.strip(), width=76, **kwargs)).rstrip() + paragraph = '\n'.join(textwrap.wrap(paragraph.strip(), width=76, **kwargs)).rstrip() + paragraphs2.append(paragraph) + # don't reflow literal code blocks (I hope) + dont_reflow = paragraph.endswith('::') + if subsequent_indent: + kwargs['initial_indent'] = subsequent_indent + text = '\n\n'.join(paragraphs2).rstrip() + if not text.endswith('\n'): + text += '\n' + return text diff --git a/src/blurb/blurb.py b/src/blurb/blurb.py index 4e0082b..4357bd9 100755 --- a/src/blurb/blurb.py +++ b/src/blurb/blurb.py @@ -42,106 +42,15 @@ import base64 import glob import hashlib -import itertools import os import sys -import textwrap import time from blurb._template import ( next_filename_unsanitize_sections, sanitize_section, - sanitize_section_legacy, sections, unsanitize_section, + sanitize_section_legacy, sections, ) -def textwrap_body(body, *, subsequent_indent=''): - """ - Accepts either a string or an iterable of strings. - (Iterable is assumed to be individual lines.) - Returns a string. - """ - if isinstance(body, str): - text = body - else: - text = "\n".join(body).rstrip() - - # textwrap merges paragraphs, ARGH - - # step 1: remove trailing whitespace from individual lines - # (this means that empty lines will just have \n, no invisible whitespace) - lines = [] - for line in text.split("\n"): - lines.append(line.rstrip()) - text = "\n".join(lines) - # step 2: break into paragraphs and wrap those - paragraphs = text.split("\n\n") - paragraphs2 = [] - kwargs = {'break_long_words': False, 'break_on_hyphens': False} - if subsequent_indent: - kwargs['subsequent_indent'] = subsequent_indent - dont_reflow = False - for paragraph in paragraphs: - # don't reflow bulleted / numbered lists - dont_reflow = dont_reflow or paragraph.startswith(("* ", "1. ", "#. ")) - if dont_reflow: - initial = kwargs.get("initial_indent", "") - subsequent = kwargs.get("subsequent_indent", "") - if initial or subsequent: - lines = [line.rstrip() for line in paragraph.split("\n")] - indents = itertools.chain( - itertools.repeat(initial, 1), - itertools.repeat(subsequent), - ) - lines = [indent + line for indent, line in zip(indents, lines)] - paragraph = "\n".join(lines) - paragraphs2.append(paragraph) - else: - # Why do we reflow the text twice? Because it can actually change - # between the first and second reflows, and we want the text to - # be stable. The problem is that textwrap.wrap is deliberately - # dumb about how many spaces follow a period in prose. - # - # We're reflowing at 76 columns, but let's pretend it's 30 for - # illustration purposes. If we give textwrap.wrap the following - # text--ignore the line of 30 dashes, that's just to help you - # with visualization: - # - # ------------------------------ - # xxxx xxxx xxxx xxxx xxxx. xxxx - # - # The first textwrap.wrap will return this: - # "xxxx xxxx xxxx xxxx xxxx.\nxxxx" - # - # If we reflow it again, textwrap will rejoin the lines, but - # only with one space after the period! So this time it'll - # all fit on one line, behold: - # ------------------------------ - # xxxx xxxx xxxx xxxx xxxx. xxxx - # and so it now returns: - # "xxxx xxxx xxxx xxxx xxxx. xxxx" - # - # textwrap.wrap supports trying to add two spaces after a peroid: - # https://docs.python.org/3/library/textwrap.html#textwrap.TextWrapper.fix_sentence_endings - # But it doesn't work all that well, because it's not smart enough - # to do a really good job. - # - # Since blurbs are eventually turned into ReST and rendered anyway, - # and since the Zen says "In the face of ambiguity, refuse the - # temptation to guess", I don't sweat it. I run textwrap.wrap - # twice, so it's stable, and this means occasionally it'll - # convert two spaces to one space, no big deal. - - paragraph = "\n".join(textwrap.wrap(paragraph.strip(), width=76, **kwargs)).rstrip() - paragraph = "\n".join(textwrap.wrap(paragraph.strip(), width=76, **kwargs)).rstrip() - paragraphs2.append(paragraph) - # don't reflow literal code blocks (I hope) - dont_reflow = paragraph.endswith("::") - if subsequent_indent: - kwargs['initial_indent'] = subsequent_indent - text = "\n\n".join(paragraphs2).rstrip() - if not text.endswith("\n"): - text += "\n" - return text - def sortable_datetime(): return time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) diff --git a/tests/test_blurb.py b/tests/test_blurb.py index 7d23b8a..4f11726 100644 --- a/tests/test_blurb.py +++ b/tests/test_blurb.py @@ -4,49 +4,6 @@ from blurb import blurb -@pytest.mark.parametrize( - "body, subsequent_indent, expected", - ( - ( - "This is a test of the textwrap_body function with a string. It should wrap the text to 79 characters.", - "", - "This is a test of the textwrap_body function with a string. It should wrap\n" - "the text to 79 characters.\n", - ), - ( - [ - "This is a test of the textwrap_body function", - "with an iterable of strings.", - "It should wrap the text to 79 characters.", - ], - "", - "This is a test of the textwrap_body function with an iterable of strings. It\n" - "should wrap the text to 79 characters.\n", - ), - ( - "This is a test of the textwrap_body function with a string and subsequent indent.", - " ", - "This is a test of the textwrap_body function with a string and subsequent\n" - " indent.\n", - ), - ( - "This is a test of the textwrap_body function with a bullet list and subsequent indent. The list should not be wrapped.\n" - "\n" - "* Item 1\n" - "* Item 2\n", - " ", - "This is a test of the textwrap_body function with a bullet list and\n" - " subsequent indent. The list should not be wrapped.\n" - "\n" - " * Item 1\n" - " * Item 2\n", - ), - ), -) -def test_textwrap_body(body, subsequent_indent, expected): - assert blurb.textwrap_body(body, subsequent_indent=subsequent_indent) == expected - - @time_machine.travel("2025-01-07 16:28:41") def test_sortable_datetime(): assert blurb.sortable_datetime() == "2025-01-07-16-28-41" diff --git a/tests/test_utils_text.py b/tests/test_utils_text.py new file mode 100644 index 0000000..831a649 --- /dev/null +++ b/tests/test_utils_text.py @@ -0,0 +1,45 @@ +import pytest +from blurb._utils.text import textwrap_body + + +@pytest.mark.parametrize( + "body, subsequent_indent, expected", + ( + ( + "This is a test of the textwrap_body function with a string. It should wrap the text to 79 characters.", + "", + "This is a test of the textwrap_body function with a string. It should wrap\n" + "the text to 79 characters.\n", + ), + ( + [ + "This is a test of the textwrap_body function", + "with an iterable of strings.", + "It should wrap the text to 79 characters.", + ], + "", + "This is a test of the textwrap_body function with an iterable of strings. It\n" + "should wrap the text to 79 characters.\n", + ), + ( + "This is a test of the textwrap_body function with a string and subsequent indent.", + " ", + "This is a test of the textwrap_body function with a string and subsequent\n" + " indent.\n", + ), + ( + "This is a test of the textwrap_body function with a bullet list and subsequent indent. The list should not be wrapped.\n" + "\n" + "* Item 1\n" + "* Item 2\n", + " ", + "This is a test of the textwrap_body function with a bullet list and\n" + " subsequent indent. The list should not be wrapped.\n" + "\n" + " * Item 1\n" + " * Item 2\n", + ), + ), +) +def test_textwrap_body(body, subsequent_indent, expected): + assert textwrap_body(body, subsequent_indent=subsequent_indent) == expected