Skip to content

Commit

Permalink
Merge pull request #37 from krassowski/plain-text
Browse files Browse the repository at this point in the history
Add plain text and cPython docstring support
  • Loading branch information
krassowski committed Feb 19, 2024
2 parents e43172e + 08286b6 commit 4e8f011
Show file tree
Hide file tree
Showing 8 changed files with 282 additions and 2 deletions.
11 changes: 10 additions & 1 deletion docstring_to_markdown/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from .cpython import cpython_to_markdown
from .google import google_to_markdown, looks_like_google
from .plain import looks_like_plain_text, plain_text_to_markdown
from .rst import looks_like_rst, rst_to_markdown

__version__ = "0.13"
__version__ = "0.14"


class UnknownFormatError(Exception):
Expand All @@ -15,4 +17,11 @@ def convert(docstring: str) -> str:
if looks_like_google(docstring):
return google_to_markdown(docstring)

if looks_like_plain_text(docstring):
return plain_text_to_markdown(docstring)

cpython = cpython_to_markdown(docstring)
if cpython:
return cpython

raise UnknownFormatError()
5 changes: 5 additions & 0 deletions docstring_to_markdown/_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from re import sub


def escape_markdown(text: str) -> str:
return sub(r'([\\#*_[\]])', r'\\\1', text)
37 changes: 37 additions & 0 deletions docstring_to_markdown/cpython.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from typing import Union, List
from re import fullmatch

from ._utils import escape_markdown

def _is_cpython_signature_line(line: str) -> bool:
"""CPython uses signature lines in the following format:
str(bytes_or_buffer[, encoding[, errors]]) -> str
"""
return fullmatch(r'\w+\(\S*(, \S+)*(\[, \S+\])*\)\s--?>\s.+', line) is not None


def cpython_to_markdown(text: str) -> Union[str, None]:
signature_lines: List[str] = []
other_lines: List[str] = []
for line in text.splitlines():
if not other_lines and _is_cpython_signature_line(line):
signature_lines.append(line)
elif not signature_lines:
return None
elif line.startswith(' '):
signature_lines.append(line)
else:
other_lines.append(line)
return '\n'.join([
'```',
'\n'.join(signature_lines),
'```',
escape_markdown('\n'.join(other_lines))
])

def looks_like_cpython(text: str) -> bool:
return cpython_to_markdown(text) is not None


__all__ = ['looks_like_cpython', 'cpython_to_markdown']
27 changes: 27 additions & 0 deletions docstring_to_markdown/plain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from re import fullmatch
from ._utils import escape_markdown


def looks_like_plain_text(value: str) -> bool:
"""Check if given string has plain text following English syntax without need for escaping.
Accepts:
- words without numbers
- full stop, bangs and question marks at the end of a word if followed by a space or end of string
- commas, colons and semicolons if after a word and followed by a space
- dashes between words (like in `e-mail`)
- double and single quotes if proceeded with a space and followed by a word, or if proceeded by a word and followed by a space (or end of string); single quotes are also allowed in between two words
- parentheses if opening preceded by space and closing followed by space or end
Does not accept:
- square brackets (used in markdown a lot)
"""
if '_' in value:
return False
return fullmatch(r"((\w[\.!\?\)'\"](\s|$))|(\w[,:;]\s)|(\w[-']\w)|(\w\s['\"\(])|\w|\s)+", value) is not None


def plain_text_to_markdown(text: str) -> str:
return escape_markdown(text)

__all__ = ['looks_like_plain_text', 'plain_text_to_markdown']
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ docstring-to-markdown = py.typed
addopts =
--pyargs tests
--cov docstring_to_markdown
--cov-fail-under=98
--cov-fail-under=99
--cov-report term-missing:skip-covered
-p no:warnings
--flake8
Expand Down
57 changes: 57 additions & 0 deletions tests/test_convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from docstring_to_markdown import convert, UnknownFormatError
import pytest

CPYTHON = """\
bool(x) -> bool
Returns True when the argument x is true, False otherwise.\
"""


CPYTHON_MD = """\
```
bool(x) -> bool
```
Returns True when the argument x is true, False otherwise.\
"""

GOOGLE = """Do **something**.
Args:
a: some arg
b: some arg
"""

GOOGLE_MD = """Do **something**.
#### Args
- `a`: some arg
- `b`: some arg
"""


RST = "Please see `this link<https://example.com>`__."
RST_MD = "Please see [this link](https://example.com)."


def test_convert_cpython():
assert convert(CPYTHON) == CPYTHON_MD


def test_convert_plain_text():
assert convert('This is a sentence.') == 'This is a sentence.'


def test_convert_google():
assert convert(GOOGLE) == GOOGLE_MD


def test_convert_rst():
assert convert(RST) == RST_MD


def test_unknown_format():
with pytest.raises(UnknownFormatError):
convert('ARGS [arg1, arg2] RETURNS: str OR None')
103 changes: 103 additions & 0 deletions tests/test_cpython.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import pytest
from docstring_to_markdown.cpython import looks_like_cpython, cpython_to_markdown

BOOL = """\
bool(x) -> bool
Returns True when the argument x is true, False otherwise.\
"""

BOOL_MD = """\
```
bool(x) -> bool
```
Returns True when the argument x is true, False otherwise.\
"""

BYTES = """\
bytes(iterable_of_ints) -> bytes
bytes(string, encoding[, errors]) -> bytes
bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer
bytes(int) -> bytes object of size given by the parameter initialized with null bytes
bytes() -> empty bytes object
Construct an immutable array of bytes from:
- an iterable yielding integers in range(256)
- a text string encoded using the specified encoding
- any object implementing the buffer API.
- an integer\
"""

COLLECTIONS_DEQUEUE = """\
deque([iterable[, maxlen]]) --> deque object
A list-like sequence optimized for data accesses near its endpoints.\
"""

DICT = """\
dict() -> new empty dictionary
dict(mapping) -> new dictionary initialized from a mapping object's
(key, value) pairs
dict(iterable) -> new dictionary initialized as if via:
d = {}
for k, v in iterable:
d[k] = v
dict(**kwargs) -> new dictionary initialized with the name=value pairs
in the keyword argument list. For example: dict(one=1, two=2)\
"""

STR = """\
str(object='') -> str
str(bytes_or_buffer[, encoding[, errors]]) -> str
Create a new string object from the given object. If encoding or
errors is specified, then the object must expose a data buffer
that will be decoded using the given encoding and error handler.
Otherwise, returns the result of object.__str__() (if defined)
or repr(object).\
"""

STR_MD = """\
```
str(object='') -> str
str(bytes_or_buffer[, encoding[, errors]]) -> str
```
Create a new string object from the given object. If encoding or
errors is specified, then the object must expose a data buffer
that will be decoded using the given encoding and error handler.
Otherwise, returns the result of object.\\_\\_str\\_\\_() (if defined)
or repr(object).\
"""


@pytest.mark.parametrize("text", [BYTES, STR, DICT, BOOL, COLLECTIONS_DEQUEUE])
def test_accepts_cpython_docstrings(text):
assert looks_like_cpython(text) is True


@pytest.mark.parametrize("text", [
"[link label](https://link)",
"![image label](https://source)",
"Some **bold** text",
"More __bold__ text",
"Some *italic* text",
"More _italic_ text",
"This is a sentence.",
"Exclamation!",
"Can I ask a question?",
"Let's send an e-mail",
"Parentheses (are) fine (really)",
"Double \"quotes\" and single 'quotes'"
])
def test_rejects_markdown_and_plain_text(text):
assert looks_like_cpython(text) is False


def test_conversion_bool():
assert cpython_to_markdown(BOOL) == BOOL_MD


def test_conversion_str():
assert cpython_to_markdown(STR) == STR_MD
42 changes: 42 additions & 0 deletions tests/test_plain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import pytest
from docstring_to_markdown.plain import looks_like_plain_text, plain_text_to_markdown


@pytest.mark.parametrize("text", [
"This is a sentence.",
"Exclamation!",
"Can I ask a question?",
"Let's send an e-mail",
"Parentheses (are) fine (really)",
"Double \"quotes\" and single 'quotes'"
])
def test_accepts_english(text):
assert looks_like_plain_text(text) is True


@pytest.mark.parametrize("text", [
"[link label](https://link)",
"![image label](https://source)",
"Some **bold** text",
"More __bold__ text",
"Some *italic* text",
"More _italic_ text"
])
def test_rejects_markdown(text):
assert looks_like_plain_text(text) is False


@pytest.mark.parametrize("text", [
"def test():",
"print(123)",
"func(arg)",
"2 + 2",
"var['test']",
"x = 'test'"
])
def test_rejects_code(text):
assert looks_like_plain_text(text) is False


def test_conversion():
assert plain_text_to_markdown("test") == "test"

0 comments on commit 4e8f011

Please sign in to comment.